changeset 5:5cfa4b6db588 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:13:27 -0400
parents c5a6f28a3e85
children 76231961d33b
files bowtie2_wrapper.xml read_group_macros.xml test-data/bowtie2-test2.bam
diffstat 3 files changed, 502 insertions(+), 138 deletions(-) [+]
line wrap: on
line diff
--- a/bowtie2_wrapper.xml	Thu Dec 04 13:05:09 2014 -0500
+++ b/bowtie2_wrapper.xml	Tue Oct 13 12:13:27 2015 -0400
@@ -1,13 +1,16 @@
-<tool id="bowtie2" name="Bowtie2" version="0.3">
+<tool id="bowtie2" name="Bowtie2" version="0.6">
     <!-- Wrapper compatible with Bowtie version 2.2.4 -->
     <description>- map reads against reference genome</description>
+    <macros>
+        <import>read_group_macros.xml</import>
+    </macros>
     <version_command>bowtie2 --version</version_command>
     <requirements>
         <requirement type="package" version="2.2.4">bowtie2</requirement>
         <requirement type="package" version="0.1.18">samtools</requirement>
     </requirements>
     <command>
-        
+
         ## prepare bowtie2 index
         #set index_path = ''
         #if str($reference_genome.source) == "history":
@@ -17,27 +20,26 @@
         #else:
             #set index_path = $reference_genome.index.fields.path
         #end if
-        
+
         ## execute bowtie2
-        
+
         bowtie2
-        
+
         ## number of threads
         -p \${GALAXY_SLOTS:-4}
 
         ## index file path
         -x $index_path
-        
-        
+
         ## Fastq inputs
         #if str( $library.type ) == "single":
-            -U "${input_1}"
+            -U "${library.input_1}"
             #if str( $library.unaligned_file ) == "true":
                 --un $output_unaligned_reads_l
             #end if
         #elif str( $library.type ) == "paired":
-            -1 "${input_1}"
-            -2 "${input_2}"
+            -1 "${library.input_1}"
+            -2 "${library.input_2}"
             #if str( $library.paired_options.paired_options_selector ) == "yes":
                 -I "${library.paired_options.I}"
                 -X "${library.paired_options.X}"
@@ -69,15 +71,33 @@
                 --un-conc $output_unaligned_reads_l
             #end if
         #end if
-        
-        ## Readgroups
-        #if str( $read_group.read_group_selector ) == "yes":
-            --rg-id "${read_group.rgid}"
-            --rg "SM:${read_group.rgsm}"
-            --rg "LB:${read_group.rglb}"
-            --rg "PL:${read_group.rgpl}"
+
+        ## Read group information.
+        @define_read_group_helpers@
+        #if str( $library.type ) == "single":
+            #set $rg_auto_name = $read_group_name_default($library.input_1)
+        #elif str( $library.type ) == "paired":
+            #set $rg_auto_name = $read_group_name_default($library.input_1, $library.input_2)
+        #else
+            #set $rg_auto_name = $read_group_name_default($library.input_1)
         #end if
-        
+        @set_use_rg_var@
+        @set_read_group_vars@
+        #if $use_rg
+          $format_read_group("", $rg_id, '"', arg='--rg-id ')
+          $format_read_group("SM:", $rg_sm, '"', arg='--rg ')
+          $format_read_group("PL:", $rg_pl, '"', arg='--rg ')
+          $format_read_group("LB:", $rg_lb, '"', arg='--rg ')
+          $format_read_group("CN:", $rg_cn, '"', arg='--rg ')
+          $format_read_group("DS:", $rg_ds, '"', arg='--rg ')
+          $format_read_group("DT:", $rg_dt, '"', arg='--rg ')
+          $format_read_group("FO:", $rg_fo, '"', arg='--rg ')
+          $format_read_group("KS:", $rg_ks, '"', arg='--rg ')
+          $format_read_group("PG:", $rg_pg, '"', arg='--rg ')
+          $format_read_group("PI:", $rg_pi, '"', arg='--rg ')
+          $format_read_group("PU:", $rg_pu, '"', arg='--rg ')
+        #end if
+
         ## Analysis type
         #if ( str( $analysis_type.analysis_type_selector ) == "simple" and str( $analysis_type.presets ) != "no_presets" ):
             $analysis_type.presets
@@ -88,83 +108,87 @@
                 --trim5 "${analysis_type.input_options.trim5}"
                 --trim3 "${analysis_type.input_options.trim3}"
                 ${analysis_type.input_options.qv_encoding}
-                ${analysis_type.input_options.solexa-quals}
-                ${analysis_type.input_options.int-quals}
+                ${analysis_type.input_options.solexa_quals}
+                ${analysis_type.input_options.int_quals}
             #end if
-            
+
             #if str( $analysis_type.alignment_options.alignment_options_selector ) == "yes":
-                -N "${$analysis_type.alignment_options.N}"
-                -L "${$analysis_type.alignment_options.L}"
-                -i "${$analysis_type.alignment_options.i}"
-                --n_ceil "${$analysis_type.alignment_options.n_ceil}"
-                --dpad "${$analysis_type.alignment_options.dpad}"
-                --gbar "${$analysis_type.alignment_options.gbar}"
-                ${analysis_type.alignment_options.ignore-quals}
+                -N "${analysis_type.alignment_options.N}"
+                -L "${analysis_type.alignment_options.L}"
+                -i "${analysis_type.alignment_options.i}"
+                --n-ceil "${analysis_type.alignment_options.n_ceil}"
+                --dpad "${analysis_type.alignment_options.dpad}"
+                --gbar "${analysis_type.alignment_options.gbar}"
+                ${analysis_type.alignment_options.ignore_quals}
                 ${analysis_type.alignment_options.nofw}
                 ${analysis_type.alignment_options.norc}
                 ${analysis_type.alignment_options.no_1mm_upfront}
                 #if str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "end-to-end":
                     --end-to-end
-                    --score-min "${$analysis_type.alignment_options.align_mode.core-min}"
+                    --score-min "${analysis_type.alignment_options.align_mode.score_min_ete}"
                 #elif str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local":
                     --local
-                    --score-min "${$analysis_type.alignment_options.align_mode.core-min}"
+                    --score-min "${analysis_type.alignment_options.align_mode.score_min_loc}"
                 #end if
             #end if
-            
+
             #if str( $analysis_type.scoring_options.scoring_options_selector ) == "yes":
-                --ma "${analysis_type.scoring_options.ma}"
+                #if ( str( $analysis_type.alignment_options.alignment_options_selector ) == "yes" and str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local" ):
+                    --ma "${analysis_type.scoring_options.ma}"
+                #end if
                 --mp "${analysis_type.scoring_options.mp}"
                 --np "${analysis_type.scoring_options.np}"
                 --rdg "${analysis_type.scoring_options.rdg_read_open},${analysis_type.scoring_options.rdg_read_extend}"
                 --rfg "${analysis_type.scoring_options.rfg_ref_open},${analysis_type.scoring_options.rfg_ref_extend}"
             #end if
-            
+
             #if str( $analysis_type.reporting_options.reporting_options_selector ) == "k":
                 -k "${analysis_type.reporting_options.k}"
             #elif str( $analysis_type.reporting_options.reporting_options_selector ) == "a":
                 -a
             #end if
-            
+
             #if str( $analysis_type.effort_options.effort_options_selector ) == "yes":
                 -D "${analysis_type.effort_options.D}"
                 -R "${analysis_type.effort_options.R}"
             #end if
-            
+
             #if str( $analysis_type.sam_options.sam_options_selector ) == "yes":
-                ${analysis_type.sam_options.no-unal}
-                ${analysis_type.sam_options.omit-sec-seq}
+                ${analysis_type.sam_options.no_unal}
+                ${analysis_type.sam_options.omit_sec_seq}
             #end if
-            
+
             #if str( $analysis_type.other_options.other_options_selector ) == "yes":
                 ${analysis_type.other_options.reorder}
-                ${analysis_type.other_options.non-deterministic}
+                ${analysis_type.other_options.non_deterministic}
                 --seed "${analysis_type.other_options.seed}"
             #end if
-        
+
         #elif str( $analysis_type.analysis_type_selector ) == "cline":
             ${analysis_type.cline}
-        #end if    
-        
-        ## view/sort and output BAM file
-        | samtools view -Su - | samtools sort -o - - > $output
-        
+        #end if
+
+        ## output file
+        #if ( str( $analysis_type.analysis_type_selector ) != "full" or str( $analysis_type.sam_opt ) != "true" ):
+          | samtools view -Su - | samtools sort -o - - &gt; $output
+        #else
+          &gt; $output_sam
+        #end if
+
         ## rename unaligned sequence files
         #if $library.type == "paired" and $output_unaligned_reads_l and $output_unaligned_reads_r:
-            #set left  = str($output_unaligned_reads_l).replace( '.dat', '.1.dat' )
-            #set right = str($output_unaligned_reads_l).replace( '.dat', '.2.dat' )
-        
-            ; mv $left $output_unaligned_reads_l;
-            mv $right $output_unaligned_reads_r
+            #from os.path import splitext
+            #set _unaligned_root, _unaligned_ext = splitext( str( $output_unaligned_reads_l ) )
+            &amp;&amp; mv "${ _unaligned_root }.1${_unaligned_ext}" "${ output_unaligned_reads_l }"
+            &amp;&amp; mv "${ _unaligned_root }.2${_unaligned_ext}" "${ output_unaligned_reads_r }"
         #end if
-        
+
     </command>
-    
     <!-- basic error handling -->
     <stdio>
         <exit_code range="1:" level="fatal" description="Tool exception" />
     </stdio>
-    
+
     <inputs>
         <!-- single/paired -->
         <conditional name="library">
@@ -175,12 +199,12 @@
             </param>
 
             <when value="single">
-                <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
+                <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Must be of datatype &quot;fastqsanger&quot;" />
                 <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" />
             </when>
             <when value="paired">
-                <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
-                <param name="input_2" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <param name="input_1" format="fastqsanger" type="data" label="FASTQ file #1" help="Must be of datatype &quot;fastqsanger&quot;" />
+                <param name="input_2" format="fastqsanger" type="data" label="FASTQ file #2" help="Must be of datatype &quot;fastqsanger&quot;" />
                 <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" />
                 <conditional name="paired_options">
                     <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See &quot;Alignment Options&quot; section of Help below for information">
@@ -189,17 +213,17 @@
                     </param>
                     <when value="yes">
                         <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins;  E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied).  A 19-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run.  This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/>
-                        <param name="X" type="integer" value="500" min="0" lable="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied).  A 61-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Deafult=500"/>
-                        <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriatefor Illumina's Paired-end Sequencing Assay)">
+                        <param name="X" type="integer" value="500" min="0" label="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied).  A 61-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Default=500"/>
+                        <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)">
                             <option value="--fr" selected="True">--fr</option>
-                            <option value="--rf">--fr</option>
+                            <option value="--rf">--rf</option>
                             <option value="--ff">--ff</option>
                         </param>
                         <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/>
                         <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/>
-                        <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant.  See also: `Mates can overlap, contain or dovetail each other` in help section below; default=False"/>
-                        <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
-                        <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant.  See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
+                        <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. Default=False"/>
+                        <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. Default=False"/>
+                        <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. Default=False"/>
                     </when>
                     <when value="no">
                         <!-- do nothing -->
@@ -207,7 +231,7 @@
                 </conditional>
             </when>
             <when value="paired_collection">
-                <param name="input_1" format="fastqsanger" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <param name="input_1" format="fastqsanger" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype &quot;fastqsanger&quot;" />
                 <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" />
                 <conditional name="paired_options">
                     <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See &quot;Alignment Options&quot; section of Help below for information">
@@ -216,17 +240,17 @@
                     </param>
                     <when value="yes">
                         <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins;  E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied).  A 19-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run.  This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/>
-                        <param name="X" type="integer" value="500" min="0" lable="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied).  A 61-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Deafult=500"/>
-                        <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriatefor Illumina's Paired-end Sequencing Assay)">
+                        <param name="X" type="integer" value="500" min="0" label="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied).  A 61-bp gap would not be valid in that case.  If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Default=500"/>
+                        <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)">
                             <option value="--fr" selected="True">--fr</option>
-                            <option value="--rf">--fr</option>
+                            <option value="--rf">--rf</option>
                             <option value="--ff">--ff</option>
                         </param>
                         <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/>
                         <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/>
-                        <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant.  See also: `Mates can overlap, contain or dovetail each other` in help section below; default=False"/>
-                        <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
-                        <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant.  See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/>
+                        <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. Default=False"/>
+                        <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. Default=False"/>
+                        <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. Default=False"/>
                     </when>
                     <when value="no">
                         <!-- do nothing -->
@@ -255,20 +279,7 @@
         </conditional>
         
         <!-- read group settings -->
-        <conditional name="read_group">
-            <param name="read_group_selector" type="select" label="Specify the read group for this file?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details">
-                <option value="yes">Yes</option>
-                <option value="no" selected="True">No</option>
-            </param>
-            <when value="yes">
-                <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section." help="--rg-id; Required if RG specified. Read group IDs may be modified when merging SAM files in order to handle collisions." />
-                <param name="rglb" type="text" size="25" label="Library name (LB)" help="--rg; Required if RG specified" />
-                <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="--rg; Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO" />
-                <param name="rgsm" type="text" size="25" label="Sample (SM)" help="--rg; Required if RG specified. Use pool name where a pool is being sequenced" />
-            </when>
-            <when value="no" />
-        </conditional>
-        
+        <expand macro="read_group_conditional" />
         <conditional name="analysis_type">
             <param name="analysis_type_selector" type="select" label="Select analysis mode">
                 <option value="simple">1: Default setting only</option>
@@ -294,16 +305,16 @@
                         <option value="no" selected="true">No</option>
                     </param>
                     <when value="yes">
-                        <param name="skip" type="integer" min="0" value="0" lable="Skip (i.e. do not align) the first that many reads or pairs in the input" help="-s/--skip; default=0"/>
-                        <param name="qupto" type="integer" min="-1" value="-1" label="Align the first that many reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop" help="-u/--qupto; default=-1 (no limit)"/>
+                        <param name="skip" type="integer" min="0" value="0" label="Skip (i.e. do not align) the first that many reads or pairs in the input" help="-s/--skip; default=0"/>
+                        <param name="qupto" type="integer" min="1" value="100000000" label="Align the first that many reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop" help="-u/--qupto; for default behavior (no limit) leave this value very large"/>
                         <param name="trim5" type="integer" min="0" value="0" label="Trim that many bases from 5' (left) end of each read before alignment" help="-5/--trim5; default=0"/>
                         <param name="trim3" type="integer" min="0" value="0" label="Trim that many bases from 3' (right) end of each read before alignment" help="-3/--trim3; default=0"/>
                         <param name="qv_encoding" type="select" display="radio" label="Select quality score encoding" help="See help below for more details">
-                            <option value="--phred33">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option>
-                            <option value="--phred64" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option>
+                            <option value="--phred33" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option>
+                            <option value="--phred64">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option>
                         </param>
-                        <param name="solexa-quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="False" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default=False"/>
-                        <param name="int-quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="False" label="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified" help="--int-quals; default=False"/>
+                        <param name="solexa_quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="False" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default=False"/>
+                        <param name="int_quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="False" label="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified" help="--int-quals; default=False"/>
                     </when>
                     <when value="no">
                         <!-- do nothing -->
@@ -316,25 +327,25 @@
                     </param>
                     <when value="yes">
                         <param name="N" type="integer" min="0" max="1" value="0" label="Set the number of mismatches to be allowed in a seed alignment during multiseed alignment (see `Multiseed alignment` section of help below)" help="-N; Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) but increases sensitivity; default=0"/>
-                        <param name="L" type="integer" min="0" value="20" label="Sets the length of the seed substrings to align during multiseed alignment (see `Multiseed alignment` section of help below)" help="-L; Smaller values make alignment slower but more senstive. Default: the `--sensitive` preset is used by default, which sets `-L` to 20 both in `--end-to-end` mode and in `--local` mode"/>
-                        <param name="i" type="text" value="S,1,1.15" size="10" label="Set a function governing the interval between seed substrings to use during multiseed alignment (see `Multiseed alignment` section of help below). Also see description of this option below in the help section" help="-i; Since it's best to use longer intervals for longer reads, this parameter sets the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. See also `Setting function options` below in help section. If the function returns a result less than 1, it is rounded up to 1. Default: the `--sensitive` preset is used by default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75` in `--local` mode"/>
-                        <param name="n_ceil" type="text" value="`L,0,0.15" label="Set a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length" help="--n-ceil; For instance, specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, where x is the read length.  See also: [setting function options]. Reads exceeding this ceiling are [filtered out]. Default=`L,0,0.15`"/>
-                        <param name="dpad" type="integer" min="0" value="15" lable="Pad dynamic programming problems by that many columns on either side to allow gaps" help="--dpad; default=15"/>
+                        <param name="L" type="integer" min="0" max="32" value="22" label="Sets the length of the seed substrings to align during multiseed alignment (see `Multiseed alignment` section of help below)" help="-L; Smaller values make alignment slower but more sensitive. Default=22"/>
+                        <param name="i" type="text" value="S,1,1.15" label="Set a function governing the interval between seed substrings to use during multiseed alignment (see `Multiseed alignment` section of help below). Also see description of this option below in the help section" help="-i; Since it's best to use longer intervals for longer reads, this parameter sets the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. If the function returns a result less than 1, it is rounded up to 1. Default=`S,1,1.15`"/>
+                        <param name="n_ceil" type="text" value="L,0,0.15" label="Set a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length" help="--n-ceil; For instance, specifying `L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, where x is the read length. Reads exceeding this ceiling are filtered out. Default=`L,0,0.15`"/>
+                        <param name="dpad" type="integer" min="0" value="15" label="Pad dynamic programming problems by that many columns on either side to allow gaps" help="--dpad; default=15"/>
                         <param name="gbar" type="integer" min="0" value="4" label="Disallow gaps within that many positions of the beginning or end of the read" help="--gbar; default=4"/>
-                        <param name="ignore-quals" type="boolean" truevalue="--ignore-quals" falsevalue="" selected="False" label="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value" help="--ignore-quals; input is treated as though all quality values are high; default=False"/>
+                        <param name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" selected="False" label="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value" help="--ignore-quals; input is treated as though all quality values are high; default=False"/>
                         <param name="nofw" type="boolean" truevalue="--nofw" falsevalue="" selected="False" label="Do not attempt to align unpaired reads to the forward (Watson) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/>
                         <param name="norc" type="boolean" truevalue="--norc" falsevalue="" selected="False" label="Do not attempt to align unpaired reads to the reverse (Crick) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/>
-                        <param name="no_1mm_upfront" type="boolean" truevalue="--no-1mm-upfront" falsevalue="" selected="False" label="Prevent searching for 1-mismatch end-to-end alignments before using the multiseed heuristic (see `Multiseed alignment` section of help baelow)" help="--no-1mm-upfront; By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch end-to-end alignment for the read *before* trying the [multiseed heuristic].  Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments.  However, this can lead to unexpected alignments when the user also sets options governing the [multiseed heuristic], like `-L` and `-N`.  For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported.  This option prevents Bowtie 2 from searching for 1-mismatch end-to-end alignments before using the [multiseed heuristic], which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed; Default=False"/>
+                        <param name="no_1mm_upfront" type="boolean" truevalue="--no-1mm-upfront" falsevalue="" selected="False" label="Prevent searching for 1-mismatch end-to-end alignments before using the multiseed heuristic (see `Multiseed alignment` section of help below)" help="--no-1mm-upfront; By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch end-to-end alignment for the read *before* trying the multiseed heuristic.  Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments.  However, this can lead to unexpected alignments when the user also sets options governing the multiseed heuristic, like `-L` and `-N`.  For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported.  This option prevents Bowtie 2 from searching for 1-mismatch end-to-end alignments before using the multiseed heuristic, which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed; Default=False"/>
                         <conditional name="align_mode">
                             <param name="align_mode_selector" type="select" display="radio" label="Select between `--local` and `--end-to-end` alignment modes" help="--local and --end-to-end; see help below for detailed explanation; default=--end-to-end">
                                 <option value="end-to-end" selected="True">End to End (--end-to-end)</option>
                                 <option value="local">Local (--local)</option>
                             </param>
                             <when value="end-to-end">
-                                <param name="score-min" type="text" value="G,20,8" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length.  See also: [setting function options].  The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
+                                <param name="score_min_ete" type="text" value="L,-0.6,-0.6" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
                             </when>
                             <when value="local">
-                                <param name="score-min" type="text" value="L,-0.6,-0.6" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length.  See also: [setting function options].  The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
+                                <param name="score_min_loc" type="text" value="G,20,8" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/>
                             </when>
                         </conditional>
                     </when>
@@ -349,7 +360,7 @@
                     </param>
                     <when value="yes">
                         <param name="ma" type="integer" value="2" label="Set the match bonus" help="--ma;  In `--local` mode match bonus is added to the alignment score for each position where a read character aligns to a reference character and the characters match. Not used in `--end-to-end` mode; Default=2"/>
-                        <param name="mp" type="text" size="10" value="6,2" label="Set the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers" help="--mp; A number less than or equal to `MX` and greater than or equal to `MN` is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an `N`.  If `--ignore-quals` is specified, the number subtracted quals `MX`. Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` where Q is the Phred quality value; Default=6,2"/>
+                        <param name="mp" type="text" value="6,2" label="Set the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers" help="--mp; A number less than or equal to `MX` and greater than or equal to `MN` is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an `N`.  If `--ignore-quals` is specified, the number subtracted quals `MX`. Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` where Q is the Phred quality value; Default=6,2"/>
                         <param name="np" type="integer" value="1" label="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as `N`" help="--np; Default=1"/>
                         <param name="rdg_read_open" type="integer" value="5" label="Set the read gap opening penalty" help="--rdg; this is the first component of --rdg flag - opening penalty; Default=5"/>
                         <param name="rdg_read_extend" type="integer" value="3" label="Set the read gap extension penalty" help="--rdg; this is the second component of --rdg flag - extension penalty; Default=3"/>
@@ -369,10 +380,10 @@
                     <when value="no">
                         <!-- do nothing -->
                     </when>
-                    <when value="-k">
-                        <param name="k" type="integer" min="0" value="1" label="Searches for at most that many distinct, valid alignments for each read" help="-k; see detalied description of this option in the help section below. Note: Bowtie 2 is not designed with large values for `-k` in mind, and when aligning reads to long, repetitive genomes large `-k` can be very, very slow"/>
+                    <when value="k">
+                        <param name="k" type="integer" min="1" value="1" label="Searches for at most that many distinct, valid alignments for each read" help="-k; see detailed description of this option in the help section below. Note: Bowtie 2 is not designed with large values for `-k` in mind, and when aligning reads to long, repetitive genomes large `-k` can be very, very slow"/>
                     </when>
-                    <when value="-a">
+                    <when value="a">
                         <!-- do nothing here; set -a flag on the command line-->
                     </when>
                 </conditional>
@@ -382,7 +393,7 @@
                         <option value="no" selected="true">No</option>
                     </param>
                     <when value="yes">
-                        <param name="D" type="integer" value="15" min="0" label="Attemp that many consecutive seed extension attempts to `fail` before Bowtie 2 moves on, using the alignments found so far" help="-D; A seed extension `fails` if it does not yield a new best or a new second-best alignment.  This limit is automatically adjusted up when -k or -a are specified. Default=15"/>
+                        <param name="D" type="integer" value="15" min="0" label="Attempt that many consecutive seed extension attempts to `fail` before Bowtie 2 moves on, using the alignments found so far" help="-D; A seed extension `fails` if it does not yield a new best or a new second-best alignment.  This limit is automatically adjusted up when -k or -a are specified. Default=15"/>
                         <param name="R" type="integer" value="2" min="0" label="Set the maximum number of times Bowtie 2 will `re-seed` reads with repetitive seeds" help="When `re-seeding`, Bowtie 2 simply chooses a new set of reads (same length, same number of mismatches allowed) at different offsets and searches for more alignments.  A read is considered to have repetitive seeds if the total number of seed hits divided by the number of seeds that aligned at least once is greater than 300.  Default=2"/>
                     </when>
                     <when value="no">
@@ -396,8 +407,8 @@
                         <option value="no" selected="true">No</option>
                     </param>
                     <when value="yes">
-                        <param name="no-unal" type="boolean" truevalue="--no-unal" falsevalue="" label="Suppress SAM records for reads that failed to align" help="--no-unal; Default=False"/>
-                        <param name="omit-sec-seq" type="boolean" truevalue="--omit-sec-seq" falsevalue="" label="Suppress SEQ and QUAL strings for secondary alignments" help="--omit-sec-seq; Default=False"/>
+                        <param name="no_unal" type="boolean" truevalue="--no-unal" falsevalue="" label="Suppress SAM records for reads that failed to align" help="--no-unal; Default=False"/>
+                        <param name="omit_sec_seq" type="boolean" truevalue="--omit-sec-seq" falsevalue="" label="Suppress SEQ and QUAL strings for secondary alignments" help="--omit-sec-seq; Default=False"/>
                     </when>
                     <when value="no">
                         <!-- do nothing -->
@@ -411,12 +422,13 @@
                     <when value="yes">
                         <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" label="Guarantee that output SAM records are printed in an order corresponding to the order of the reads in the original input file" help="--reorder; Default=False"/>
                         <param name="seed" type="integer" value="0" min="0" label="Use this number as the seed for pseudo-random number generator" help="--seed; Default=0"/>
-                        <param name="non-deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="--non-deterministic; see Help below for explanation of this option; default=False"/>
+                        <param name="non_deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="--non-deterministic; see Help below for explanation of this option; default=False"/>
                     </when>
                     <when value="no">
                         <!-- do nothing -->
                     </when>
                 </conditional>
+                <param name="sam_opt" type="boolean" truevalue="true" falsevalue="false" label="Would you like the output to be a SAM file" help="By default, the output from this Bowtie2 wrapper is a sorted BAM file."/>
             </when>
         </conditional>
     </inputs>
@@ -428,21 +440,45 @@
         <data format="fastqsanger" name="output_unaligned_reads_l" label="${tool.name} on ${on_string}: unaligned reads (L)" >
             <filter>library['unaligned_file'] is True</filter>
             <actions>
-                <action type="format">
-                    <option type="from_param" name="library.input_1" param_attribute="ext" />
-                </action>
+                <conditional name="library.type">
+                    <when value="single">
+                        <action type="format">
+                            <option type="from_param" name="library.input_1" param_attribute="ext" />
+                        </action>
+                    </when>
+                    <when value="paired">
+                        <action type="format">
+                            <option type="from_param" name="library.input_1" param_attribute="ext" />
+                        </action>
+                    </when>
+                    <when value="paired_collection">
+                        <action type="format">
+                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
+                        </action>
+                    </when>
+                </conditional>
             </actions>
         </data>
         <data format="fastqsanger" name="output_unaligned_reads_r" label="${tool.name} on ${on_string}: unaligned reads (R)">
             <filter>( library['type'] == "paired" or library['type'] == "paired_collection" ) and library['unaligned_file'] is True</filter>
             <actions>
-                <action type="format">
-                    <option type="from_param" name="library.input_1" param_attribute="ext" />
-                </action>
+                <conditional name="library.type">
+                    <when value="paired">
+                        <action type="format">
+                            <option type="from_param" name="library.input_2" param_attribute="ext" />
+                        </action>
+                    </when>
+                    <when value="paired_collection">
+                        <action type="format">
+                            <option type="from_param" name="library.input_1" param_attribute="reverse.ext" />
+                        </action>
+                    </when>
+                </conditional>
             </actions>
         </data>
         
-        <data format="bam" name="output" label="${tool.name} on ${on_string}: aligned reads in BAM format">
+        <data format="bam" name="output" label="${tool.name} on ${on_string}: aligned reads (sorted BAM)">
+          <filter>analysis_type['analysis_type_selector'] == "simple" or analysis_type['sam_opt'] is False</filter>
           <actions>
             <conditional name="reference_genome.source">
               <when value="indexed">
@@ -461,6 +497,28 @@
             </conditional>
           </actions>
         </data>
+
+        <data format="sam" name="output_sam" label="${tool.name} on ${on_string}: aligned reads (SAM)">
+          <filter>analysis_type['analysis_type_selector'] == "full" and analysis_type['sam_opt'] is True</filter>
+          <actions>
+            <conditional name="reference_genome.source">
+              <when value="indexed">
+                <action type="metadata" name="dbkey">
+                  <option type="from_data_table" name="bowtie2_indexes" column="1" offset="0">
+                    <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                    <filter type="param_value" ref="reference_genome.index" column="0"/>
+                  </option>
+                </action>
+              </when>
+              <when value="history">
+                <action type="metadata" name="dbkey">
+                  <option type="from_param" name="reference_genome.own_file" param_attribute="dbkey" />
+                </action>
+              </when>
+            </conditional>
+          </actions>
+        </data>
+
     </outputs>
 
     <tests>
@@ -477,6 +535,22 @@
             <param name="own_file" value="bowtie2-ref.fasta" />
             <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/>
         </test>
+        <test>
+            <!-- basic test on single paired default run -->
+            <param name="type" value="paired"/>
+            <param name="selection" value="no"/>
+            <param name="paired_options_selector" value="no"/>
+            <param name="unaligned_file" value="false"/>
+            <param name="analysis_type_selector" value="simple"/>
+            <param name="rg_selector" value="set"/>
+            <param name="ID" value="rg1"/>
+            <param name="PL" value="CAPILLARY"/>
+            <param name="source" value="history" />
+            <param name="input_1" value="bowtie2-fq1.fq" ftype="fastqsanger"/>
+            <param name="input_2" value="bowtie2-fq2.fq" ftype="fastqsanger"/>
+            <param name="own_file" value="bowtie2-ref.fasta" />
+            <output name="output" file="bowtie2-test2.bam" ftype="bam" lines_diff="2"/>
+        </test>
     </tests>
 
     <help>
@@ -541,7 +615,7 @@
             pipelines.
 
     --phred64
-            Input qualities are ASCII chars equal to the [Phred quality] plus 64.  This is
+            Input qualities are ASCII chars equal to the Phred quality plus 64.  This is
             also called the "Phred+64" encoding.
 
     --solexa-quals
@@ -551,7 +625,7 @@
 
     --int-quals
             Quality values are represented in the read input file as space-separated ASCII integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`....
-            Integers are treated as being on the [Phred quality] scale unless
+            Integers are treated as being on the Phred quality scale unless
             `--solexa-quals` is also specified. Default: off.
                         
 ------
@@ -591,19 +665,19 @@
 **Alignment options**::
 
     -N &lt;int&gt;
-            Sets the number of mismatches to allowed in a seed alignment during [multiseed
-            alignment].  Can be set to 0 or 1. Setting this higher makes alignment slower
+            Sets the number of mismatches to allowed in a seed alignment during multiseed
+            alignment.  Can be set to 0 or 1. Setting this higher makes alignment slower
             (often much slower) but increases sensitivity.  Default: 0.
 
     -L &lt;int&gt;
-            Sets the length of the seed substrings to align during [multiseed alignment].
-            Smaller values make alignment slower but more senstive. Default: the
-            `--sensitive` preset is used by default, which sets `-L` to 20 both in
-            `--end-to-end` mode and in `--local` mode.
+            Sets the length of the seed substrings to align during multiseed alignment.
+            Smaller values make alignment slower but more sensitive. Default: the
+            `--sensitive` preset is used by default, which sets `-L` to 22 in
+            `--end-to-end` mode and to 20 in `--local` mode.
 
     -i &lt;func&gt;
             Sets a function governing the interval between seed substrings to use during
-            [multiseed alignment].  For instance, if the read has 30 characers, and seed
+            multiseed alignment.  For instance, if the read has 30 characers, and seed
             length is 10, and the seed interval is 6, the seeds extracted will be:
 
     Read:      TAGCTACGCTCTACGCTATCATGCATAAAC
@@ -620,7 +694,7 @@
     the interval as a function of the read length, rather than a single
     one-size-fits-all number.  For instance, specifying `-i S,1,2.5` sets the
     interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length.
-    See also: [setting function options]. If the function returns a result less than
+    If the function returns a result less than
     1, it is rounded up to 1. Default: the `--sensitive` preset is used by
     default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75`
     in `--local` mode.
@@ -629,8 +703,8 @@
             Sets a function governing the maximum number of ambiguous characters (usually
             `N`s and/or `.`s) allowed in a read as a function of read length.  For instance,
             specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`,
-            where x is the read length.  See also: [setting function options].  Reads
-            exceeding this ceiling are [filtered out].  Default: `L,0,0.15`.
+            where x is the read length.  Reads exceeding this ceiling are filtered out.
+            Default: `L,0,0.15`.
 
     --dpad &lt;int&gt;
             "Pads" dynamic programming problems by `&lt;int&gt;` columns on either side to allow
@@ -658,14 +732,14 @@
 
     --no-1mm-upfront
             By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch
-            end-to-end alignment for the read *before* trying the [multiseed heuristic].  Such
+            end-to-end alignment for the read *before* trying the multiseed heuristic.  Such
             alignments can be found very quickly, and many short read alignments have exact or
             near-exact end-to-end alignments.  However, this can lead to unexpected
-            alignments when the user also sets options governing the [multiseed heuristic],
+            alignments when the user also sets options governing the multiseed heuristic,
             like `-L` and `-N`.  For instance, if the user specifies `-N 0` and `-L` equal
             to the length of the read, the user will be surprised to find 1-mismatch alignments
             reported.  This option prevents Bowtie 2 from searching for 1-mismatch end-to-end
-            alignments before using the [multiseed heuristic], which leads to the expected
+            alignments before using the multiseed heuristic, which leads to the expected
             behavior when combined with options such as `-L` and `-N`.  This comes at the
             expense of speed.
 
@@ -721,8 +795,7 @@
             Sets a function governing the minimum alignment score needed for an alignment to
             be considered "valid" (i.e. good enough to report).  This is a function of read
             length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f`
-            to `f(x) = 0 + -0.6 * x`, where `x` is the read length.  See also: [setting
-            function options].  The default in `--end-to-end` mode is `L,-0.6,-0.6` and
+            to `f(x) = 0 + -0.6 * x`, where `x` is the read length.  The default in `--end-to-end` mode is `L,-0.6,-0.6` and
             the default in `--local` mode is `G,20,8`.
                                         
 -----                                        
@@ -840,18 +913,15 @@
     --dovetail
             If the mates "dovetail", that is if one mate alignment extends past the
             beginning of the other such that the wrong mate begins upstream, consider that
-            to be concordant.  See also: [Mates can overlap, contain or dovetail each
-            other].  Default: mates cannot dovetail in a concordant alignment.
+            to be concordant.  Default: mates cannot dovetail in a concordant alignment.
 
     --no-contain
             If one mate alignment contains the other, consider that to be non-concordant.
-            See also: [Mates can overlap, contain or dovetail each other].  Default: a mate
-            can contain the other in a concordant alignment.
+            Default: a mate can contain the other in a concordant alignment.
 
     --no-overlap
             If one mate alignment overlaps the other at all, consider that to be
-            non-concordant.  See also: [Mates can overlap, contain or dovetail each other]. 
-            Default: mates can overlap in a concordant alignment.
+            non-concordant.  Default: mates can overlap in a concordant alignment.
                                 
 ------
 
@@ -866,9 +936,9 @@
     --rg &lt;text&gt;
             Add `&lt;text&gt;` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the
             `@RG` header line.  Note: in order for the `@RG` line to appear, `--rg-id`
-            must also be specified.  This is because the `ID` tag is required by the [SAM
-            Spec][SAM].  Specify `--rg` multiple times to set multiple fields.  See the
-            [SAM Spec][SAM] for details about what fields are legal.
+            must also be specified.  This is because the `ID` tag is required by the SAM
+            Specification.  Specify `--rg` multiple times to set multiple fields.  See the
+            SAM Specification for details about what fields are legal.
 
     --omit-sec-seq
             When printing secondary alignments, Bowtie 2 by default will write out the `SEQ`
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_group_macros.xml	Tue Oct 13 12:13:27 2015 -0400
@@ -0,0 +1,294 @@
+<macros>
+    <!-- Import this at the top of your command block and then
+         define rg_auto_name. -->
+    <token name="@define_read_group_helpers@">
+#def identifier_or_name($input1)
+    #if hasattr($input1, 'element_identifier')
+        #return $input1.element_identifier
+    #else
+        #return $input1.name.rstrip('.gz').rstrip('.fastq').rstrip('.fq')
+    #end if
+#end def
+
+#def clean(name)
+    #import re
+    #set $name_clean = re.sub('[^\w\-_\.]', '_', $name)
+    #return $name_clean
+#end def
+
+#def read_group_name_default($input1, $input2=None)
+    #if $input2 is None
+        #return $clean($identifier_or_name($input1))
+    #else
+        #import itertools
+        #set $input_name1 = $clean($identifier_or_name($input1))
+        #set $input_name2 = $clean($identifier_or_name($input2))
+        #set $common_prefix = ''.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))])
+        #if len($common_prefix) > 3
+            #return $common_prefix
+        #else
+            #return $input_name1
+        #end if
+    #end if
+#end def
+
+#def format_read_group(prefix, value, quote='', arg='')
+    #if $value
+        #return $arg + $quote + $prefix + $value + $quote
+    #else
+        #return ''
+    #end if
+#end def
+
+#def rg_param(name)
+    #if $varExists("rg")
+        #return $rg.get($name, None)
+    #else
+        #return $getVar($name, None)
+    #end if
+#end def
+
+#set $use_rg = True
+    </token>
+    <!-- preconditions use_rg and rg_auto_name have been
+         defined.
+    -->
+    <token name="@set_read_group_vars@">
+#if $use_rg
+    #if $rg_param('read_group_id_conditional') is None
+        #set $rg_id = $rg_auto_name
+    #elif $rg_param('read_group_id_conditional').do_auto_name
+        #set $rg_id = $rg_auto_name
+    #else
+        #set $rg_id = str($rg_param('read_group_id_conditional').ID)
+    #end if
+
+    #if $rg_param('read_group_sm_conditional') is None
+        #set $rg_sm = ''
+    #elif $rg_param('read_group_sm_conditional').do_auto_name
+        #set $rg_sm = $rg_auto_name
+    #else
+        #set $rg_sm = str($rg_param('read_group_sm_conditional').SM)
+    #end if
+
+    #if $rg_param('PL')
+        #set $rg_pl = str($rg_param('PL'))
+    #else
+        #set $rg_pl = ''
+    #end if
+
+    #if $rg_param('read_group_lb_conditional') is None
+        #set $rg_lb = ''
+    #elif $rg_param('read_group_lb_conditional').do_auto_name
+        #set $rg_lb = $rg_auto_name
+    #else
+        #set $rg_lb = str($rg_param('read_group_lb_conditional').LB)
+    #end if
+
+    #if $rg_param('CN')
+        #set $rg_cn = str($rg_param('CN'))
+    #else
+        #set $rg_cn = ''
+    #end if
+
+    #if $rg_param("DS")
+        #set $rg_ds = str($rg_param("DS"))
+    #else
+        #set $rg_ds = ''
+    #end if
+
+    #if $rg_param("DT")
+        #set $rg_dt = str($rg_param("DT"))
+    #else
+        #set $rg_dt = ''
+    #end if
+
+    #if $rg_param("FO")
+        #set $rg_fo = str($rg_param("FO"))
+    #else
+        #set $rg_fo = ''
+    #end if
+
+    #if $rg_param("KS")
+        #set $rg_ks = str($rg_param("KS"))
+    #else
+        #set $rg_ks = ''
+    #end if
+
+    #if $rg_param("PG")
+        #set $rg_pg = str($rg_param("PG"))
+    #else
+        #set $rg_pg = ''
+    #end if
+
+    #if str($rg_param("PI"))
+        #set $rg_pi = str($rg_param("PI"))
+    #else
+        #set $rg_pi = ''
+    #end if
+
+    #if $rg_param("PU")
+        #set $rg_pu = str($rg_param("PU"))
+    #else
+        #set $rg_pu = ''
+    #end if
+#end if
+    </token>
+    <token name="@set_use_rg_var@">
+#set $use_rg = str($rg.rg_selector) != "do_not_set"
+    </token>
+    <xml name="read_group_auto_name_conditional">
+        <param name="do_auto_name" type="boolean" label="Auto-assign" help="Use dataset name or collection information to automatically assign this value" checked="no" />
+        <when value="true">
+        </when>
+        <when value="false">
+            <yield />
+        </when>
+    </xml>
+    <xml name="read_group_id_param">
+        <param name="ID" type="text" value="" label="Read group identifier (ID)" help="This value must be unique among multiple samples in your experiment" optional="false">
+            <validator type="empty_field" />
+        </param>
+    </xml>
+    <xml name="read_group_id_conditional">
+        <conditional name="read_group_id_conditional">
+            <expand macro="read_group_auto_name_conditional">
+                <expand macro="read_group_id_param" />
+            </expand>
+        </conditional>
+    </xml>
+    <xml name="read_group_sm_param">
+        <param name="SM" type="text" value="" label="Read group sample name (SM)" help="This value should be descriptive. Use pool name where a pool is being sequenced" />
+    </xml>
+    <xml name="read_group_sm_conditional">
+        <conditional name="read_group_sm_conditional">
+            <expand macro="read_group_auto_name_conditional">
+                <expand macro="read_group_sm_param" />
+            </expand>
+        </conditional>
+    </xml>
+    <!-- Above SM param is optional (for SAM/BAM spec, this is required
+         as per Picard.
+    -->
+    <xml name="read_group_sm_param_required">
+        <param name="SM" type="text" value="" label="Read group sample name (SM)" optional="false" help="This value should be descriptive. Use pool name where a pool is being sequenced">
+            <validator type="empty_field" />
+        </param>
+    </xml>
+    <xml name="read_group_sm_required_conditional">
+        <conditional name="read_group_sm_conditional">
+            <expand macro="read_group_auto_name_conditional">
+                <expand macro="read_group_sm_param" />
+            </expand>
+        </conditional>
+    </xml>
+    <xml name="read_group_pl_param">
+        <param name="PL" type="select" label="Platform/technology used to produce the reads (PL)">
+            <option value="CAPILLARY">CAPILLARY</option>
+            <option value="LS454">LS454</option>
+            <option selected="True" value="ILLUMINA">ILLUMINA</option>
+            <option value="SOLID">SOLID</option>
+            <option value="HELICOS">HELICOS</option>
+            <option value="IONTORRENT">IONTORRENT</option>
+            <option value="PACBIO">PACBIO</option>
+        </param>
+    </xml>
+    <xml name="read_group_lb_param">
+        <param name="LB" type="text" label="Library name (LB)" optional="true" />
+    </xml>
+    <xml name="read_group_lb_conditional">
+        <conditional name="read_group_lb_conditional">
+            <expand macro="read_group_auto_name_conditional">
+                <expand macro="read_group_lb_param" />
+            </expand>
+        </conditional>
+    </xml>
+    <xml name="read_group_lb_required_param">
+        <param name="LB" type="text" label="Library name (LB)" optional="false">
+            <validator type="empty_field" />
+        </param>
+    </xml>
+    <xml name="read_group_lb_required_conditional">
+        <conditional name="read_group_lb_conditional">
+            <expand macro="read_group_auto_name_conditional">
+                <expand macro="read_group_lb_required_param" />
+            </expand>
+        </conditional>
+    </xml>
+    <xml name="read_group_cn_param">
+        <param name="CN" type="text" label="Sequencing center that produced the read (CN)" />
+    </xml>
+    <xml name="read_group_ds_param">
+        <param name="DS" type="text" label="Description (DS)" />
+    </xml>
+    <xml name="read_group_dt_param">
+        <param name="DT" type="text" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" />
+    </xml>
+    <xml name="read_group_fo_param">
+        <param name="FO" type="text" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\*|[ACMGRSVTWYHKDBN]+/">
+          <validator type="regex" message="Invalid flow order">\*|[ACMGRSVTWYHKDBN]+$</validator>
+        </param>
+    </xml>
+    <xml name="read_group_ks_param">
+        <param name="KS" type="text" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" />
+    </xml>
+    <xml name="read_group_pg_param">
+        <param name="PG" type="text" label="Programs used for processing the read group (PG)" />
+    </xml>
+    <xml name="read_group_pi_param">
+        <param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" />
+    </xml>
+    <xml name="read_group_pu_param">
+        <param name="PU" type="text" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="True" />
+    </xml>
+    <xml name="read_group_pu_required_param">
+        <param name="PU" type="text" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="False" />
+    </xml>
+    <!-- Only ID is required - all groups available -->
+    <xml name="read_group_inputs_spec">
+        <expand macro="read_group_id_conditional" />
+        <expand macro="read_group_sm_conditional" />
+        <expand macro="read_group_pl_param" />
+        <expand macro="read_group_lb_conditional" />
+        <expand macro="read_group_cn_param" />
+        <expand macro="read_group_ds_param" />
+        <expand macro="read_group_dt_param" />
+        <expand macro="read_group_fo_param" />
+        <expand macro="read_group_ks_param" />
+        <expand macro="read_group_pg_param" />
+        <expand macro="read_group_pi_param" />
+        <expand macro="read_group_pu_param" />
+    </xml>
+    <!-- ID, SM, LB, PU, PL all required - not ks, pg, or fo params. -->
+    <xml name="read_group_inputs_picard">
+        <expand macro="read_group_id_conditional" />
+        <expand macro="read_group_sm_required_conditional" />
+        <expand macro="read_group_lb_required_conditional" />
+        <expand macro="read_group_pl_param" />
+        <expand macro="read_group_pu_required_param" />
+        <expand macro="read_group_cn_param" />
+        <expand macro="read_group_ds_param" />
+        <expand macro="read_group_pi_param" />
+        <expand macro="read_group_dt_param" />
+    </xml>
+    <xml name="read_group_conditional">
+        <conditional name="rg">
+            <param name="rg_selector" type="select" label="Set read groups information?" help="Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets.">
+                <option value="set">Set read groups (SAM/BAM specification)</option>
+                <option value="set_picard">Set read groups (Picard style)</option>
+                <option value="set_id_auto">Automatically assign ID</option>
+                <option value="do_not_set" selected="True">Do not set</option>
+            </param>
+            <when value="set_picard">
+                <expand macro="read_group_inputs_picard" />
+            </when>
+            <when value="set">
+                <expand macro="read_group_inputs_spec" />
+            </when>
+            <when value="set_id_auto">
+            </when>
+            <when value="do_not_set">
+            </when>
+        </conditional>
+    </xml>
+</macros>
Binary file test-data/bowtie2-test2.bam has changed