changeset 3:14ff32f355f0 draft

Uploaded v0.0.2, pair aware bam2fq with pre-sorting
author peterjc
date Tue, 04 Nov 2014 07:11:58 -0500
parents 27135d7637b6
children 8ac1057b1a7d
files test-data/sam_spec_padded.bam2fq_pairs.fastq test-data/sam_spec_padded.bam2fq_singles.fastq tools/samtools_bam2fq/README.rst tools/samtools_bam2fq/samtools_bam2fq.xml
diffstat 4 files changed, 73 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_spec_padded.bam2fq_pairs.fastq	Tue Nov 04 07:11:58 2014 -0500
@@ -0,0 +1,4 @@
+>r001/1
+TTAGATAAAGGATACTG
+>r001/2
+ATGCCGCTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sam_spec_padded.bam2fq_singles.fastq	Tue Nov 04 07:11:58 2014 -0500
@@ -0,0 +1,8 @@
+>r002
+AAAAGATAAGGATA
+>r003
+AGCTAA
+>r004
+ATAGCTTCAGC
+>ref
+AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
--- a/tools/samtools_bam2fq/README.rst	Tue Nov 04 06:07:53 2014 -0500
+++ b/tools/samtools_bam2fq/README.rst	Tue Nov 04 07:11:58 2014 -0500
@@ -49,6 +49,7 @@
 Version Changes
 ------- ----------------------------------------------------------------------
 v0.0.1  - Initial public release, tested with samtools v1.1.
+v0.0.2  - Defaults to pair-aware mode which requires pre-sorting by read name.
 ======= ======================================================================
 
 
@@ -61,7 +62,7 @@
 For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder::
 
-    $ tar -czf samtools_bam2fq.tar.gz tools/samtools_bam2fq/README.rst tools/samtools_bam2fq/samtools_bam2fq.xml tools/samtools_bam2fq/tool_dependencies.xml test-data/sam_spec_padded.bam test-data/sam_spec_padded.sam test-data/sam_spec_padded.depad.bam test-data/sam_spec_padded.bam2fq.fastq test-data/sam_spec_padded.bam2fq_no_suf.fastq
+    $ tar -czf samtools_bam2fq.tar.gz tools/samtools_bam2fq/README.rst tools/samtools_bam2fq/samtools_bam2fq.xml tools/samtools_bam2fq/tool_dependencies.xml test-data/sam_spec_padded.bam test-data/sam_spec_padded.sam test-data/sam_spec_padded.depad.bam test-data/sam_spec_padded.bam2fq.fastq test-data/sam_spec_padded.bam2fq_no_suf.fastq test-data/sam_spec_padded.bam2fq_singles.fastq test-data/sam_spec_padded.bam2fq_pairs.fastq
 
 Check this worked::
 
@@ -74,6 +75,8 @@
     test-data/sam_spec_padded.depad.bam
     test-data/sam_spec_padded.bam2fq.fastq
     test-data/sam_spec_padded.bam2fq_no_suf.fastq
+    test-data/sam_spec_padded.bam2fq_singles.fastq
+    test-data/sam_spec_padded.bam2fq_pairs.fastq
 
 
 Licence (MIT)
--- a/tools/samtools_bam2fq/samtools_bam2fq.xml	Tue Nov 04 06:07:53 2014 -0500
+++ b/tools/samtools_bam2fq/samtools_bam2fq.xml	Tue Nov 04 07:11:58 2014 -0500
@@ -1,22 +1,47 @@
-<tool id="samtools_bam2fq" name="Convert BAM to FASTQ" version="0.0.1">
+<tool id="samtools_bam2fq" name="Convert BAM to FASTQ" version="0.0.2">
     <description>samtools bam2fq</description>
     <requirements>
         <requirement type="binary">samtools</requirement>
         <requirement type="package" version="1.1">samtools</requirement>
     </requirements>
     <version_command>samtools 2&gt;&amp;1 | grep -i "Version:"</version_command>
-    <command>samtools bam2fq $suffices $orig_qual "$input_bam" &gt; "$out_fastq"</command>
+    <command>
+        #if $action_mode.mode == "pairs":
+            ## Sort by name for pair-aware output (should give nice interlaced FASTQ)
+            ## Galaxy has a tendancy to automatically apply co-ordindate sorting,
+            ## so just do this every time. If it was name sorted, pay an IO overhead.
+            ## Note requiring -T is samtools issue 295
+            samtools sort -n -O bam -T TEMP_SORT "$input_bam" | samtools bam2fq -s "$singletons_fastq" - &gt; "$pairs_fastq"
+        #else
+            ## Naive conversion using order in the input file
+            samtools bam2fq $suffices $orig_qual "$input_bam" &gt; "$out_fastq"
+        #end if
+    </command>
     <inputs>
-        <!-- Unlike other bits of samtools, this seems to autodetect SAM vs BAM -->
-        <param name="input_bam" type="data" format="bam,sam" label="Input BAM file" />
+        <!-- Unlike samtools 0.1.x, samtools 1.1 will autodetect SAM vs BAM -->
+        <param name="input_bam" type="data" format="bam,sam" label="Input SAM/BAM file" />
         <param name="suffices" type="boolean" label="Add /1 and /2 suffices to paired reads?"
 	       truevalue="" falsevalue="-n" checked="true" />
         <param name="orig_qual" type="boolean" label="Use original qualities (OQ tags) if present?"
                truevalue="-O" falsevalue="" checked="false" />
-	<!-- TODO - new option -s in samtools v1.1 -->
+        <!-- Using a condition here to allow different output files; default to paired mode -->
+        <conditional name="action_mode">
+            <param name="mode" type="select" label="Mode of action">
+                <option value="pairs" selected="true">Sort by name, then divide into paired and singletons (two FASTQ files)</option>
+                <option value="naive">No pre-sorting, all reads in a single FASTQ file</option>
+            </param>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="out_fastq" format="fastqsanger" label="$input_bam.name (bam2fq)" />
+        <data name="pairs_fastq" format="fastqsanger" label="$input_bam.name (bam2fq pairs)">
+	      <filter>(action_mode['mode'] == 'pairs')</filter>
+        </data>
+        <data name="singletons_fastq" format="fastqsanger" label="$input_bam.name (bam2fq singletons)">
+              <filter>(action_mode['mode'] == 'pairs')</filter>
+        </data>
+        <data name="out_fastq" format="fastqsanger" label="$input_bam.name (bam2fq)">
+            <filter>(action_mode['mode'] == 'naive')</filter>
+        </data>
     </outputs>
     <stdio>
         <!-- Assume anything other than zero is an error -->
@@ -28,33 +53,59 @@
             <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
             <param name="suffices" value="true" />
             <param name="orig_qual" value="false" />
+            <param name="mode" value="naive" />
             <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
         </test>
         <test>
             <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
             <param name="suffices" value="true" />
             <param name="orig_qual" value="true" />
+            <param name="mode" value="naive" />
             <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
         </test>
         <test>
             <param name="input_bam" value="sam_spec_padded.sam" ftype="sam" />
+            <param name="mode" value="naive" />
             <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
         </test>
         <test>
             <param name="input_bam" value="sam_spec_padded.depad.bam" ftype="bam" />
+            <param name="mode" value="naive" />
             <output name="out_fastq" file="sam_spec_padded.bam2fq.fastq" ftype="fastqsanger" />
         </test>
         <test>
             <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
             <param name="suffices" value="false"/>
+            <param name="mode" value="naive" />
             <output name="out_fastq" file="sam_spec_padded.bam2fq_no_suf.fastq" ftype="fastqsanger" />
         </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.bam" ftype="bam" />
+            <param name="suffices" value="true" />
+            <param name="orig_qual" value="false" />
+            <param name="mode" value="pairs" />
+            <output name="pairs_fastq" file="sam_spec_padded.bam2fq_pairs.fastq" ftype="fastqsanger" />
+            <output name="singletons_fastq" file="sam_spec_padded.bam2fq_singles.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_bam" value="sam_spec_padded.sam" ftype="sam" />
+            <param name="suffices" value="true" />
+            <param name="orig_qual" value="false" />
+            <param name="mode" value="pairs" />
+            <output name="pairs_fastq" file="sam_spec_padded.bam2fq_pairs.fastq" ftype="fastqsanger" />
+            <output name="singletons_fastq" file="sam_spec_padded.bam2fq_singles.fastq" ftype="fastqsanger" />
+        </test>
     </tests>
     <help>
 **What it does**
 
 This tool runs the ``samtools bam2fq`` command in the SAMtools toolkit.
 
+By default this will pre-sort your SAM/BAM file by read name and split your
+reads into an interlaced FASTQ file for paired reads, and a separate FASTQ
+file for singlton reads. A naive conversion is also offered which gives a
+single FASTQ file with the reads ordered as in the input SAM/BAM file.
+
 It is quite common to wish to remap high-throughput sequencing data. If you
 only have the mapped reads in SAM/BAM format, this tool can "unmap" them to
 recover FASTQ format reads to input into an alternative mapping tool.