diff bowtie2_wrapper.xml @ 15:7e0b333f39e1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bowtie2 commit cf554b9b69c32acb484c34fdc60384fa49c7c482
author iuc
date Thu, 01 Jun 2017 06:46:49 -0400
parents 85f0e9edb32d
children def46fdb3909
line wrap: on
line diff
--- a/bowtie2_wrapper.xml	Wed Apr 12 17:09:24 2017 -0400
+++ b/bowtie2_wrapper.xml	Thu Jun 01 06:46:49 2017 -0400
@@ -1,10 +1,10 @@
-<tool id="bowtie2" name="Bowtie2" version="2.3.0.1" profile="17.01">
+<tool id="bowtie2" name="Bowtie2" version="2.3.2.1" profile="17.01">
     <description>- map reads against reference genome</description>
     <macros>
-        <import>read_group_macros.xml</import>
+        <import>bowtie2_macros.xml</import>
     </macros>
     <requirements>
-        <requirement type="package" version="2.3.0">bowtie2</requirement>
+        <requirement type="package" version="2.3.2">bowtie2</requirement>
         <requirement type="package" version="1.3.1">samtools</requirement>
     </requirements>
     <version_command>bowtie2 --version</version_command>
@@ -66,6 +66,18 @@
                 #set read2 = "input_r.fastq"
             #end if
             ln -s '${library.input_1.reverse}' ${read2} &&
+
+        #else if str($library.type) == 'paired_interleaved':
+            #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
+                #set read1 = "input_il.fastq.gz"
+                #set compressed = "GZ"
+            #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+                #set read1 = "input_il.fastq.bz2"
+                #set compressed = "BZ2"
+            #else:
+                #set read1 = "input_il.fastq"
+            #end if
+            ln -s '${library.input_1}' ${read1} &&
         #else:
             #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
                 #set read1 = "input_f.fastq.gz"
@@ -110,6 +122,27 @@
                     --al '${output_aligned_reads_l}'
                 #end if
             #end if
+
+        #elif str( $library.type ) == "paired_interleaved":
+            --interleaved '${read1}'
+            #if str( $library.unaligned_file ) == "true":
+                #if $compressed == "GZ":
+                    --un-gz '${output_unaligned_reads_l}'
+                #else if $compressed == "BZ2":
+                    --un-bz2 '${output_unaligned_reads_l}'
+                #else:
+                    --un '${output_unaligned_reads_l}'
+                #end if
+            #end if
+            #if str( $library.aligned_file ) == "true":
+                #if $compressed == "GZ":
+                    --al-gz '${output_aligned_reads_l}'
+                #else if $compressed == "BZ2":
+                    --al-bz2 '${output_aligned_reads_l}'
+                #else:
+                    --al '${output_aligned_reads_l}'
+                #end if
+            #end if
         #else:
             -1 '${read1}'
             -2 '${read2}'
@@ -273,69 +306,36 @@
               <option value="single">Single-end</option>
               <option value="paired">Paired-end</option>
               <option value="paired_collection">Paired-end Dataset Collection</option>
+              <option value="paired_interleaved">Paired-end data from single interleaved dataset</option>
             </param>
 
             <when value="single">
                 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="FASTQ file" help="Must be of datatype &quot;fastqsanger&quot;" />
-                <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc (possibly with -gz or -bz2); This triggers --un parameter for single reads and --un-conc for paired reads" />
-                <param name="aligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" help="--al/--al-conc (possibly with -gz or -bz2); This triggers --al parameter for single reads and --al-conc for paired reads" />
+
+                <expand macro="align_unalign" />
+
             </when>
             <when value="paired">
                 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="FASTQ file #1" help="Must be of datatype &quot;fastqsanger&quot;" />
                 <param name="input_2" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="FASTQ file #2" help="Must be of datatype &quot;fastqsanger&quot;" />
-                <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc (possibly with -gz or -bz2); This triggers --un parameter for single reads and --un-conc for paired reads" />
-                <param name="aligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" help="--al/--al-conc (possibly with -gz or -bz2); This triggers --al parameter for single reads and --al-conc for paired reads" />
-                <conditional name="paired_options">
-                    <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See &quot;Alignment Options&quot; section of Help below for information">
-                        <option value="no" selected="True">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="yes">
-                        <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins;  E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied).  A 19-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run.  This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/>
-                        <param name="X" type="integer" value="500" min="0" label="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied).  A 61-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Default=500"/>
-                        <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)">
-                            <option value="--fr" selected="True">--fr</option>
-                            <option value="--rf">--rf</option>
-                            <option value="--ff">--ff</option>
-                        </param>
-                        <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/>
-                        <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/>
-                        <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. Default=False"/>
-                        <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Disallow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. Default=False"/>
-                        <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Disallow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. Default=False"/>
-                    </when>
-                    <when value="no">
-                        <!-- do nothing -->
-                    </when>
-                </conditional>
+
+                <expand macro="align_unalign" />
+                <expand macro="paired_end_options" />
+
             </when>
             <when value="paired_collection">
                 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype &quot;fastqsanger&quot;" />
-                <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc (possibly with -gz or -bz2); This triggers --un parameter for single reads and --un-conc for paired reads" />
-                <param name="aligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" help="--al/--al-conc (possibly with -gz or -bz2); This triggers --al parameter for single reads and --al-conc for paired reads" />
-                <conditional name="paired_options">
-                    <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See &quot;Alignment Options&quot; section of Help below for information">
-                        <option value="no" selected="True">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="yes">
-                        <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins;  E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied).  A 19-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run.  This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/>
-                        <param name="X" type="integer" value="500" min="0" label="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied).  A 61-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Default=500"/>
-                        <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)">
-                            <option value="--fr" selected="True">--fr</option>
-                            <option value="--rf">--rf</option>
-                            <option value="--ff">--ff</option>
-                        </param>
-                        <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/>
-                        <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/>
-                        <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. Default=False"/>
-                        <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Disallow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. Default=False"/>
-                        <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Disallow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. Default=False"/>
-                    </when>
-                    <when value="no">
-                        <!-- do nothing -->
-                    </when>
-                </conditional>
+
+                <expand macro="align_unalign" />
+                <expand macro="paired_end_options" />
+
+            </when>
+            <when value="paired_interleaved">
+                <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="Interleaved FASTQ file" help="Must be of datatype &quot;fastqsanger&quot;. --interleaved"/>
+               
+                <expand macro="align_unalign" />
+                <expand macro="paired_end_options" />
+
             </when>
         </conditional>
 
@@ -648,7 +648,6 @@
         <test>
             <!-- basic test on single paired default run -->
             <param name="type" value="paired"/>
-            <param name="selection" value="no"/>
             <param name="paired_options_selector" value="no"/>
             <param name="unaligned_file" value="false"/>
             <param name="analysis_type_selector" value="simple"/>
@@ -661,7 +660,6 @@
         <test>
             <!-- basic test on single paired default run -->
             <param name="type" value="paired"/>
-            <param name="selection" value="no"/>
             <param name="paired_options_selector" value="no"/>
             <param name="unaligned_file" value="false"/>
             <param name="analysis_type_selector" value="simple"/>
@@ -677,7 +675,6 @@
         <test>
             <!-- basic test on single paired default run with stats-->
             <param name="type" value="paired"/>
-            <param name="selection" value="no"/>
             <param name="paired_options_selector" value="no"/>
             <param name="unaligned_file" value="false"/>
             <param name="analysis_type_selector" value="simple"/>
@@ -687,12 +684,29 @@
             <param name="own_file" value="bowtie2-ref.fasta" />
             <param name="save_mapping_stats" value="true" />
             <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
-            <output name="mapping_stats" file="bowtie2-stats.out" ftype="txt"/>
+            <output name="mapping_stats">
+                <assert_contents>
+                    <has_text text="of these" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <!-- basic test on interleaved paired default run -->
+            <param name="type" value="paired_interleaved"/>
+            <!-- <param name="paired_options_selector" value="no"/> -->
+            <param name="unaligned_file" value="false"/>
+            <param name="analysis_type_selector" value="simple"/>
+            <param name="rg_selector" value="set"/>
+            <param name="ID" value="rg1"/>
+            <param name="PL" value="CAPILLARY"/>
+            <param name="source" value="history" />
+            <param name="input_1" value="bowtie2-fq_il.fq" ftype="fastqsanger"/>
+            <param name="own_file" value="bowtie2-ref.fasta" />
+            <output name="output" file="bowtie2-test_il.bam" ftype="bam" lines_diff="2"/>
         </test>
         <test>
             <!-- test fastqsanger.gz input -->
             <param name="type" value="paired"/>
-            <param name="selection" value="no"/>
             <param name="paired_options_selector" value="no"/>
             <param name="unaligned_file" value="false"/>
             <param name="analysis_type_selector" value="simple"/>
@@ -705,7 +719,6 @@
         <test>
             <!-- test fastqsanger.bz2 input -->
             <param name="type" value="paired"/>
-            <param name="selection" value="no"/>
             <param name="paired_options_selector" value="no"/>
             <param name="unaligned_file" value="false"/>
             <param name="analysis_type_selector" value="simple"/>
@@ -754,12 +767,15 @@
 
 **Inputs**
 
-Bowtie 2 accepts files in Sanger FASTQ format (single or pair-end). Use the FASTQ Groomer to prepare your files.
+Bowtie 2 accepts files in Sanger FASTQ format (single or paired-end). Paired-end data can represented as two individual (forward and reverse) datasets, as well as a single interleaved dataset (see an example at the end of the help section).
 
 ------
 
 **Input options**::
 
+    --interleaved
+            Reads interleaved FASTQ files where the first two records (8 lines) represent a mate pair.
+
     -s/--skip <int>
             Skip (i.e. do not align) the first `<int>` reads or pairs in the input.
 
@@ -1137,6 +1153,57 @@
             but might be more appropriate in situations where the input consists of many
             identical reads.
 
+-----
+
+
+**Paired-end (and mate-pair) data in fastq format**
+
+Paired end datasets can be represented as two individual datasets:
+
+First dataset::
+
+ @1/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED
+ @2/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG
+
+Second dataset::
+
+ @1/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF
+ @2/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH
+
+Or a single *interleaved* dataset::
+
+ @1/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED
+ @1/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF
+ @2/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG
+ @2/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH
+
+
+
+
     ]]></help>
     <citations>
     <citation type="doi">10.1186/gb-2009-10-3-r25</citation>