Mercurial > repos > devteam > bowtie2
changeset 5:5cfa4b6db588 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author | devteam |
---|---|
date | Tue, 13 Oct 2015 12:13:27 -0400 |
parents | c5a6f28a3e85 |
children | 76231961d33b |
files | bowtie2_wrapper.xml read_group_macros.xml test-data/bowtie2-test2.bam |
diffstat | 3 files changed, 502 insertions(+), 138 deletions(-) [+] |
line wrap: on
line diff
--- a/bowtie2_wrapper.xml Thu Dec 04 13:05:09 2014 -0500 +++ b/bowtie2_wrapper.xml Tue Oct 13 12:13:27 2015 -0400 @@ -1,13 +1,16 @@ -<tool id="bowtie2" name="Bowtie2" version="0.3"> +<tool id="bowtie2" name="Bowtie2" version="0.6"> <!-- Wrapper compatible with Bowtie version 2.2.4 --> <description>- map reads against reference genome</description> + <macros> + <import>read_group_macros.xml</import> + </macros> <version_command>bowtie2 --version</version_command> <requirements> <requirement type="package" version="2.2.4">bowtie2</requirement> <requirement type="package" version="0.1.18">samtools</requirement> </requirements> <command> - + ## prepare bowtie2 index #set index_path = '' #if str($reference_genome.source) == "history": @@ -17,27 +20,26 @@ #else: #set index_path = $reference_genome.index.fields.path #end if - + ## execute bowtie2 - + bowtie2 - + ## number of threads -p \${GALAXY_SLOTS:-4} ## index file path -x $index_path - - + ## Fastq inputs #if str( $library.type ) == "single": - -U "${input_1}" + -U "${library.input_1}" #if str( $library.unaligned_file ) == "true": --un $output_unaligned_reads_l #end if #elif str( $library.type ) == "paired": - -1 "${input_1}" - -2 "${input_2}" + -1 "${library.input_1}" + -2 "${library.input_2}" #if str( $library.paired_options.paired_options_selector ) == "yes": -I "${library.paired_options.I}" -X "${library.paired_options.X}" @@ -69,15 +71,33 @@ --un-conc $output_unaligned_reads_l #end if #end if - - ## Readgroups - #if str( $read_group.read_group_selector ) == "yes": - --rg-id "${read_group.rgid}" - --rg "SM:${read_group.rgsm}" - --rg "LB:${read_group.rglb}" - --rg "PL:${read_group.rgpl}" + + ## Read group information. + @define_read_group_helpers@ + #if str( $library.type ) == "single": + #set $rg_auto_name = $read_group_name_default($library.input_1) + #elif str( $library.type ) == "paired": + #set $rg_auto_name = $read_group_name_default($library.input_1, $library.input_2) + #else + #set $rg_auto_name = $read_group_name_default($library.input_1) #end if - + @set_use_rg_var@ + @set_read_group_vars@ + #if $use_rg + $format_read_group("", $rg_id, '"', arg='--rg-id ') + $format_read_group("SM:", $rg_sm, '"', arg='--rg ') + $format_read_group("PL:", $rg_pl, '"', arg='--rg ') + $format_read_group("LB:", $rg_lb, '"', arg='--rg ') + $format_read_group("CN:", $rg_cn, '"', arg='--rg ') + $format_read_group("DS:", $rg_ds, '"', arg='--rg ') + $format_read_group("DT:", $rg_dt, '"', arg='--rg ') + $format_read_group("FO:", $rg_fo, '"', arg='--rg ') + $format_read_group("KS:", $rg_ks, '"', arg='--rg ') + $format_read_group("PG:", $rg_pg, '"', arg='--rg ') + $format_read_group("PI:", $rg_pi, '"', arg='--rg ') + $format_read_group("PU:", $rg_pu, '"', arg='--rg ') + #end if + ## Analysis type #if ( str( $analysis_type.analysis_type_selector ) == "simple" and str( $analysis_type.presets ) != "no_presets" ): $analysis_type.presets @@ -88,83 +108,87 @@ --trim5 "${analysis_type.input_options.trim5}" --trim3 "${analysis_type.input_options.trim3}" ${analysis_type.input_options.qv_encoding} - ${analysis_type.input_options.solexa-quals} - ${analysis_type.input_options.int-quals} + ${analysis_type.input_options.solexa_quals} + ${analysis_type.input_options.int_quals} #end if - + #if str( $analysis_type.alignment_options.alignment_options_selector ) == "yes": - -N "${$analysis_type.alignment_options.N}" - -L "${$analysis_type.alignment_options.L}" - -i "${$analysis_type.alignment_options.i}" - --n_ceil "${$analysis_type.alignment_options.n_ceil}" - --dpad "${$analysis_type.alignment_options.dpad}" - --gbar "${$analysis_type.alignment_options.gbar}" - ${analysis_type.alignment_options.ignore-quals} + -N "${analysis_type.alignment_options.N}" + -L "${analysis_type.alignment_options.L}" + -i "${analysis_type.alignment_options.i}" + --n-ceil "${analysis_type.alignment_options.n_ceil}" + --dpad "${analysis_type.alignment_options.dpad}" + --gbar "${analysis_type.alignment_options.gbar}" + ${analysis_type.alignment_options.ignore_quals} ${analysis_type.alignment_options.nofw} ${analysis_type.alignment_options.norc} ${analysis_type.alignment_options.no_1mm_upfront} #if str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "end-to-end": --end-to-end - --score-min "${$analysis_type.alignment_options.align_mode.core-min}" + --score-min "${analysis_type.alignment_options.align_mode.score_min_ete}" #elif str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local": --local - --score-min "${$analysis_type.alignment_options.align_mode.core-min}" + --score-min "${analysis_type.alignment_options.align_mode.score_min_loc}" #end if #end if - + #if str( $analysis_type.scoring_options.scoring_options_selector ) == "yes": - --ma "${analysis_type.scoring_options.ma}" + #if ( str( $analysis_type.alignment_options.alignment_options_selector ) == "yes" and str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local" ): + --ma "${analysis_type.scoring_options.ma}" + #end if --mp "${analysis_type.scoring_options.mp}" --np "${analysis_type.scoring_options.np}" --rdg "${analysis_type.scoring_options.rdg_read_open},${analysis_type.scoring_options.rdg_read_extend}" --rfg "${analysis_type.scoring_options.rfg_ref_open},${analysis_type.scoring_options.rfg_ref_extend}" #end if - + #if str( $analysis_type.reporting_options.reporting_options_selector ) == "k": -k "${analysis_type.reporting_options.k}" #elif str( $analysis_type.reporting_options.reporting_options_selector ) == "a": -a #end if - + #if str( $analysis_type.effort_options.effort_options_selector ) == "yes": -D "${analysis_type.effort_options.D}" -R "${analysis_type.effort_options.R}" #end if - + #if str( $analysis_type.sam_options.sam_options_selector ) == "yes": - ${analysis_type.sam_options.no-unal} - ${analysis_type.sam_options.omit-sec-seq} + ${analysis_type.sam_options.no_unal} + ${analysis_type.sam_options.omit_sec_seq} #end if - + #if str( $analysis_type.other_options.other_options_selector ) == "yes": ${analysis_type.other_options.reorder} - ${analysis_type.other_options.non-deterministic} + ${analysis_type.other_options.non_deterministic} --seed "${analysis_type.other_options.seed}" #end if - + #elif str( $analysis_type.analysis_type_selector ) == "cline": ${analysis_type.cline} - #end if - - ## view/sort and output BAM file - | samtools view -Su - | samtools sort -o - - > $output - + #end if + + ## output file + #if ( str( $analysis_type.analysis_type_selector ) != "full" or str( $analysis_type.sam_opt ) != "true" ): + | samtools view -Su - | samtools sort -o - - > $output + #else + > $output_sam + #end if + ## rename unaligned sequence files #if $library.type == "paired" and $output_unaligned_reads_l and $output_unaligned_reads_r: - #set left = str($output_unaligned_reads_l).replace( '.dat', '.1.dat' ) - #set right = str($output_unaligned_reads_l).replace( '.dat', '.2.dat' ) - - ; mv $left $output_unaligned_reads_l; - mv $right $output_unaligned_reads_r + #from os.path import splitext + #set _unaligned_root, _unaligned_ext = splitext( str( $output_unaligned_reads_l ) ) + && mv "${ _unaligned_root }.1${_unaligned_ext}" "${ output_unaligned_reads_l }" + && mv "${ _unaligned_root }.2${_unaligned_ext}" "${ output_unaligned_reads_r }" #end if - + </command> - <!-- basic error handling --> <stdio> <exit_code range="1:" level="fatal" description="Tool exception" /> </stdio> - + <inputs> <!-- single/paired --> <conditional name="library"> @@ -175,12 +199,12 @@ </param> <when value="single"> - <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> + <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Must be of datatype "fastqsanger"" /> <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" /> </when> <when value="paired"> - <param name="input_1" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> - <param name="input_2" format="fastqsanger" type="data" label="FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <param name="input_1" format="fastqsanger" type="data" label="FASTQ file #1" help="Must be of datatype "fastqsanger"" /> + <param name="input_2" format="fastqsanger" type="data" label="FASTQ file #2" help="Must be of datatype "fastqsanger"" /> <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" /> <conditional name="paired_options"> <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See "Alignment Options" section of Help below for information"> @@ -189,17 +213,17 @@ </param> <when value="yes"> <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins; E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied). A 19-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run. This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/> - <param name="X" type="integer" value="500" min="0" lable="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Deafult=500"/> - <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriatefor Illumina's Paired-end Sequencing Assay)"> + <param name="X" type="integer" value="500" min="0" label="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Default=500"/> + <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)"> <option value="--fr" selected="True">--fr</option> - <option value="--rf">--fr</option> + <option value="--rf">--rf</option> <option value="--ff">--ff</option> </param> <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/> <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/> - <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. See also: `Mates can overlap, contain or dovetail each other` in help section below; default=False"/> - <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/> - <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/> + <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. Default=False"/> + <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. Default=False"/> + <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. Default=False"/> </when> <when value="no"> <!-- do nothing --> @@ -207,7 +231,7 @@ </conditional> </when> <when value="paired_collection"> - <param name="input_1" format="fastqsanger" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <param name="input_1" format="fastqsanger" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype "fastqsanger"" /> <param name="unaligned_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="--un/--un-conc; This triggers --un parameter for single reads and --un-conc for paired reads" /> <conditional name="paired_options"> <param name="paired_options_selector" type="select" label="Do you want to set paired-end options?" help="See "Alignment Options" section of Help below for information"> @@ -216,17 +240,17 @@ </param> <when value="yes"> <param name="I" type="integer" value="0" min="0" label="Set the minimum fragment length for valid paired-end alignments" help="-I/--minins; E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied). A 19-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates. The larger the difference between `-I` and `-X`, the slower Bowtie 2 will run. This is because larger differences bewteen `-I` and `-X` require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient. Default=0"/> - <param name="X" type="integer" value="500" min="0" lable="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Deafult=500"/> - <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriatefor Illumina's Paired-end Sequencing Assay)"> + <param name="X" type="integer" value="500" min="0" label="Set the maximum fragment length for valid paired-end alignments" help="-X/--maxins; E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-X` constraint is applied with respect to the untrimmed mates, not the trimmed mates; Default=500"/> + <param name="fr_rf_ff" type="select" display="radio" label="Select the upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand" help="--fr, --rf, or --ff; E.g., if `--fr` is specified and there is a candidate paired-end alignment where mate 1 appears upstream of the reverse complement of mate 2 and the fragment length constraints (`-I` and `-X`) are met, that alignment is valid. Also, if mate 2 appears upstream of the reverse complement of mate 1 and all other constraints are met, that too is valid. `--rf` likewise requires that an upstream mate1 be reverse-complemented and a downstream mate2 be forward-oriented. `--ff` requires both an upstream mate 1 and a downstream mate 2 to be forward-oriented; Default=--fr (appropriate for Illumina's Paired-end Sequencing Assay)"> <option value="--fr" selected="True">--fr</option> - <option value="--rf">--fr</option> + <option value="--rf">--rf</option> <option value="--ff">--ff</option> </param> <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable no-mixed behavior" help="--no-mixed; By default, when `bowtie2` cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates; default=False"/> <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable no-discordant behavior" help="--no-discordant; By default, `bowtie2` looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (`--fr`/`--rf`/`--ff`, `-I`, `-X`); default=False"/> - <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. See also: `Mates can overlap, contain or dovetail each other` in help section below; default=False"/> - <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/> - <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. See also: `Mates can overlap, contain or dovetail each other` in help section; default=False"/> + <param name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" checked="False" label="Allow mate dovetailing" help="--dovetail; If the mates `dovetail`, that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant. Default=False"/> + <param name="no_contain" type="boolean" truevalue="--no-contain" falsevalue="" checked="False" label="Allow one mate alignment to contain another" help="--no-contain; If one mate alignment contains the other, consider that to be non-concordant. Default=False"/> + <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="Allow mate alignments to overlap" help="--no-overlap; If one mate alignment overlaps the other at all, consider that to be non-concordant. Default=False"/> </when> <when value="no"> <!-- do nothing --> @@ -255,20 +279,7 @@ </conditional> <!-- read group settings --> - <conditional name="read_group"> - <param name="read_group_selector" type="select" label="Specify the read group for this file?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details"> - <option value="yes">Yes</option> - <option value="no" selected="True">No</option> - </param> - <when value="yes"> - <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section." help="--rg-id; Required if RG specified. Read group IDs may be modified when merging SAM files in order to handle collisions." /> - <param name="rglb" type="text" size="25" label="Library name (LB)" help="--rg; Required if RG specified" /> - <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="--rg; Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO" /> - <param name="rgsm" type="text" size="25" label="Sample (SM)" help="--rg; Required if RG specified. Use pool name where a pool is being sequenced" /> - </when> - <when value="no" /> - </conditional> - + <expand macro="read_group_conditional" /> <conditional name="analysis_type"> <param name="analysis_type_selector" type="select" label="Select analysis mode"> <option value="simple">1: Default setting only</option> @@ -294,16 +305,16 @@ <option value="no" selected="true">No</option> </param> <when value="yes"> - <param name="skip" type="integer" min="0" value="0" lable="Skip (i.e. do not align) the first that many reads or pairs in the input" help="-s/--skip; default=0"/> - <param name="qupto" type="integer" min="-1" value="-1" label="Align the first that many reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop" help="-u/--qupto; default=-1 (no limit)"/> + <param name="skip" type="integer" min="0" value="0" label="Skip (i.e. do not align) the first that many reads or pairs in the input" help="-s/--skip; default=0"/> + <param name="qupto" type="integer" min="1" value="100000000" label="Align the first that many reads or read pairs from the input (after the -s/--skip reads or pairs have been skipped), then stop" help="-u/--qupto; for default behavior (no limit) leave this value very large"/> <param name="trim5" type="integer" min="0" value="0" label="Trim that many bases from 5' (left) end of each read before alignment" help="-5/--trim5; default=0"/> <param name="trim3" type="integer" min="0" value="0" label="Trim that many bases from 3' (right) end of each read before alignment" help="-3/--trim3; default=0"/> <param name="qv_encoding" type="select" display="radio" label="Select quality score encoding" help="See help below for more details"> - <option value="--phred33">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option> - <option value="--phred64" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option> + <option value="--phred33" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option> + <option value="--phred64">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option> </param> - <param name="solexa-quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="False" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default=False"/> - <param name="int-quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="False" label="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified" help="--int-quals; default=False"/> + <param name="solexa_quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="False" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default=False"/> + <param name="int_quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="False" label="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified" help="--int-quals; default=False"/> </when> <when value="no"> <!-- do nothing --> @@ -316,25 +327,25 @@ </param> <when value="yes"> <param name="N" type="integer" min="0" max="1" value="0" label="Set the number of mismatches to be allowed in a seed alignment during multiseed alignment (see `Multiseed alignment` section of help below)" help="-N; Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) but increases sensitivity; default=0"/> - <param name="L" type="integer" min="0" value="20" label="Sets the length of the seed substrings to align during multiseed alignment (see `Multiseed alignment` section of help below)" help="-L; Smaller values make alignment slower but more senstive. Default: the `--sensitive` preset is used by default, which sets `-L` to 20 both in `--end-to-end` mode and in `--local` mode"/> - <param name="i" type="text" value="S,1,1.15" size="10" label="Set a function governing the interval between seed substrings to use during multiseed alignment (see `Multiseed alignment` section of help below). Also see description of this option below in the help section" help="-i; Since it's best to use longer intervals for longer reads, this parameter sets the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. See also `Setting function options` below in help section. If the function returns a result less than 1, it is rounded up to 1. Default: the `--sensitive` preset is used by default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75` in `--local` mode"/> - <param name="n_ceil" type="text" value="`L,0,0.15" label="Set a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length" help="--n-ceil; For instance, specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, where x is the read length. See also: [setting function options]. Reads exceeding this ceiling are [filtered out]. Default=`L,0,0.15`"/> - <param name="dpad" type="integer" min="0" value="15" lable="Pad dynamic programming problems by that many columns on either side to allow gaps" help="--dpad; default=15"/> + <param name="L" type="integer" min="0" max="32" value="22" label="Sets the length of the seed substrings to align during multiseed alignment (see `Multiseed alignment` section of help below)" help="-L; Smaller values make alignment slower but more sensitive. Default=22"/> + <param name="i" type="text" value="S,1,1.15" label="Set a function governing the interval between seed substrings to use during multiseed alignment (see `Multiseed alignment` section of help below). Also see description of this option below in the help section" help="-i; Since it's best to use longer intervals for longer reads, this parameter sets the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. If the function returns a result less than 1, it is rounded up to 1. Default=`S,1,1.15`"/> + <param name="n_ceil" type="text" value="L,0,0.15" label="Set a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length" help="--n-ceil; For instance, specifying `L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, where x is the read length. Reads exceeding this ceiling are filtered out. Default=`L,0,0.15`"/> + <param name="dpad" type="integer" min="0" value="15" label="Pad dynamic programming problems by that many columns on either side to allow gaps" help="--dpad; default=15"/> <param name="gbar" type="integer" min="0" value="4" label="Disallow gaps within that many positions of the beginning or end of the read" help="--gbar; default=4"/> - <param name="ignore-quals" type="boolean" truevalue="--ignore-quals" falsevalue="" selected="False" label="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value" help="--ignore-quals; input is treated as though all quality values are high; default=False"/> + <param name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" selected="False" label="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value" help="--ignore-quals; input is treated as though all quality values are high; default=False"/> <param name="nofw" type="boolean" truevalue="--nofw" falsevalue="" selected="False" label="Do not attempt to align unpaired reads to the forward (Watson) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/> <param name="norc" type="boolean" truevalue="--norc" falsevalue="" selected="False" label="Do not attempt to align unpaired reads to the reverse (Crick) reference strand" help="In paired-end mode, `--nofw` and `--norc` pertain to the fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand. Default=False"/> - <param name="no_1mm_upfront" type="boolean" truevalue="--no-1mm-upfront" falsevalue="" selected="False" label="Prevent searching for 1-mismatch end-to-end alignments before using the multiseed heuristic (see `Multiseed alignment` section of help baelow)" help="--no-1mm-upfront; By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch end-to-end alignment for the read *before* trying the [multiseed heuristic]. Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments. However, this can lead to unexpected alignments when the user also sets options governing the [multiseed heuristic], like `-L` and `-N`. For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported. This option prevents Bowtie 2 from searching for 1-mismatch end-to-end alignments before using the [multiseed heuristic], which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed; Default=False"/> + <param name="no_1mm_upfront" type="boolean" truevalue="--no-1mm-upfront" falsevalue="" selected="False" label="Prevent searching for 1-mismatch end-to-end alignments before using the multiseed heuristic (see `Multiseed alignment` section of help below)" help="--no-1mm-upfront; By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch end-to-end alignment for the read *before* trying the multiseed heuristic. Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments. However, this can lead to unexpected alignments when the user also sets options governing the multiseed heuristic, like `-L` and `-N`. For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported. This option prevents Bowtie 2 from searching for 1-mismatch end-to-end alignments before using the multiseed heuristic, which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed; Default=False"/> <conditional name="align_mode"> <param name="align_mode_selector" type="select" display="radio" label="Select between `--local` and `--end-to-end` alignment modes" help="--local and --end-to-end; see help below for detailed explanation; default=--end-to-end"> <option value="end-to-end" selected="True">End to End (--end-to-end)</option> <option value="local">Local (--local)</option> </param> <when value="end-to-end"> - <param name="score-min" type="text" value="G,20,8" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting function options]. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/> + <param name="score_min_ete" type="text" value="L,-0.6,-0.6" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/> </when> <when value="local"> - <param name="score-min" type="text" value="L,-0.6,-0.6" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting function options]. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/> + <param name="score_min_loc" type="text" value="G,20,8" label="Set a function governing the minimum alignment score needed for an alignment to be considered `valid` (i.e. good enough to report)" help="--score-min; This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`"/> </when> </conditional> </when> @@ -349,7 +360,7 @@ </param> <when value="yes"> <param name="ma" type="integer" value="2" label="Set the match bonus" help="--ma; In `--local` mode match bonus is added to the alignment score for each position where a read character aligns to a reference character and the characters match. Not used in `--end-to-end` mode; Default=2"/> - <param name="mp" type="text" size="10" value="6,2" label="Set the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers" help="--mp; A number less than or equal to `MX` and greater than or equal to `MN` is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an `N`. If `--ignore-quals` is specified, the number subtracted quals `MX`. Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` where Q is the Phred quality value; Default=6,2"/> + <param name="mp" type="text" value="6,2" label="Set the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers" help="--mp; A number less than or equal to `MX` and greater than or equal to `MN` is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an `N`. If `--ignore-quals` is specified, the number subtracted quals `MX`. Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` where Q is the Phred quality value; Default=6,2"/> <param name="np" type="integer" value="1" label="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as `N`" help="--np; Default=1"/> <param name="rdg_read_open" type="integer" value="5" label="Set the read gap opening penalty" help="--rdg; this is the first component of --rdg flag - opening penalty; Default=5"/> <param name="rdg_read_extend" type="integer" value="3" label="Set the read gap extension penalty" help="--rdg; this is the second component of --rdg flag - extension penalty; Default=3"/> @@ -369,10 +380,10 @@ <when value="no"> <!-- do nothing --> </when> - <when value="-k"> - <param name="k" type="integer" min="0" value="1" label="Searches for at most that many distinct, valid alignments for each read" help="-k; see detalied description of this option in the help section below. Note: Bowtie 2 is not designed with large values for `-k` in mind, and when aligning reads to long, repetitive genomes large `-k` can be very, very slow"/> + <when value="k"> + <param name="k" type="integer" min="1" value="1" label="Searches for at most that many distinct, valid alignments for each read" help="-k; see detailed description of this option in the help section below. Note: Bowtie 2 is not designed with large values for `-k` in mind, and when aligning reads to long, repetitive genomes large `-k` can be very, very slow"/> </when> - <when value="-a"> + <when value="a"> <!-- do nothing here; set -a flag on the command line--> </when> </conditional> @@ -382,7 +393,7 @@ <option value="no" selected="true">No</option> </param> <when value="yes"> - <param name="D" type="integer" value="15" min="0" label="Attemp that many consecutive seed extension attempts to `fail` before Bowtie 2 moves on, using the alignments found so far" help="-D; A seed extension `fails` if it does not yield a new best or a new second-best alignment. This limit is automatically adjusted up when -k or -a are specified. Default=15"/> + <param name="D" type="integer" value="15" min="0" label="Attempt that many consecutive seed extension attempts to `fail` before Bowtie 2 moves on, using the alignments found so far" help="-D; A seed extension `fails` if it does not yield a new best or a new second-best alignment. This limit is automatically adjusted up when -k or -a are specified. Default=15"/> <param name="R" type="integer" value="2" min="0" label="Set the maximum number of times Bowtie 2 will `re-seed` reads with repetitive seeds" help="When `re-seeding`, Bowtie 2 simply chooses a new set of reads (same length, same number of mismatches allowed) at different offsets and searches for more alignments. A read is considered to have repetitive seeds if the total number of seed hits divided by the number of seeds that aligned at least once is greater than 300. Default=2"/> </when> <when value="no"> @@ -396,8 +407,8 @@ <option value="no" selected="true">No</option> </param> <when value="yes"> - <param name="no-unal" type="boolean" truevalue="--no-unal" falsevalue="" label="Suppress SAM records for reads that failed to align" help="--no-unal; Default=False"/> - <param name="omit-sec-seq" type="boolean" truevalue="--omit-sec-seq" falsevalue="" label="Suppress SEQ and QUAL strings for secondary alignments" help="--omit-sec-seq; Default=False"/> + <param name="no_unal" type="boolean" truevalue="--no-unal" falsevalue="" label="Suppress SAM records for reads that failed to align" help="--no-unal; Default=False"/> + <param name="omit_sec_seq" type="boolean" truevalue="--omit-sec-seq" falsevalue="" label="Suppress SEQ and QUAL strings for secondary alignments" help="--omit-sec-seq; Default=False"/> </when> <when value="no"> <!-- do nothing --> @@ -411,12 +422,13 @@ <when value="yes"> <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" label="Guarantee that output SAM records are printed in an order corresponding to the order of the reads in the original input file" help="--reorder; Default=False"/> <param name="seed" type="integer" value="0" min="0" label="Use this number as the seed for pseudo-random number generator" help="--seed; Default=0"/> - <param name="non-deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="--non-deterministic; see Help below for explanation of this option; default=False"/> + <param name="non_deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="--non-deterministic; see Help below for explanation of this option; default=False"/> </when> <when value="no"> <!-- do nothing --> </when> </conditional> + <param name="sam_opt" type="boolean" truevalue="true" falsevalue="false" label="Would you like the output to be a SAM file" help="By default, the output from this Bowtie2 wrapper is a sorted BAM file."/> </when> </conditional> </inputs> @@ -428,21 +440,45 @@ <data format="fastqsanger" name="output_unaligned_reads_l" label="${tool.name} on ${on_string}: unaligned reads (L)" > <filter>library['unaligned_file'] is True</filter> <actions> - <action type="format"> - <option type="from_param" name="library.input_1" param_attribute="ext" /> - </action> + <conditional name="library.type"> + <when value="single"> + <action type="format"> + <option type="from_param" name="library.input_1" param_attribute="ext" /> + </action> + </when> + <when value="paired"> + <action type="format"> + <option type="from_param" name="library.input_1" param_attribute="ext" /> + </action> + </when> + <when value="paired_collection"> + <action type="format"> + <option type="from_param" name="library.input_1" param_attribute="forward.ext" /> + </action> + </when> + </conditional> </actions> </data> <data format="fastqsanger" name="output_unaligned_reads_r" label="${tool.name} on ${on_string}: unaligned reads (R)"> <filter>( library['type'] == "paired" or library['type'] == "paired_collection" ) and library['unaligned_file'] is True</filter> <actions> - <action type="format"> - <option type="from_param" name="library.input_1" param_attribute="ext" /> - </action> + <conditional name="library.type"> + <when value="paired"> + <action type="format"> + <option type="from_param" name="library.input_2" param_attribute="ext" /> + </action> + </when> + <when value="paired_collection"> + <action type="format"> + <option type="from_param" name="library.input_1" param_attribute="reverse.ext" /> + </action> + </when> + </conditional> </actions> </data> - <data format="bam" name="output" label="${tool.name} on ${on_string}: aligned reads in BAM format"> + <data format="bam" name="output" label="${tool.name} on ${on_string}: aligned reads (sorted BAM)"> + <filter>analysis_type['analysis_type_selector'] == "simple" or analysis_type['sam_opt'] is False</filter> <actions> <conditional name="reference_genome.source"> <when value="indexed"> @@ -461,6 +497,28 @@ </conditional> </actions> </data> + + <data format="sam" name="output_sam" label="${tool.name} on ${on_string}: aligned reads (SAM)"> + <filter>analysis_type['analysis_type_selector'] == "full" and analysis_type['sam_opt'] is True</filter> + <actions> + <conditional name="reference_genome.source"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="bowtie2_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="reference_genome.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="reference_genome.own_file" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + </outputs> <tests> @@ -477,6 +535,22 @@ <param name="own_file" value="bowtie2-ref.fasta" /> <output name="output" file="bowtie2-test1.bam" ftype="bam" lines_diff="2"/> </test> + <test> + <!-- basic test on single paired default run --> + <param name="type" value="paired"/> + <param name="selection" value="no"/> + <param name="paired_options_selector" value="no"/> + <param name="unaligned_file" value="false"/> + <param name="analysis_type_selector" value="simple"/> + <param name="rg_selector" value="set"/> + <param name="ID" value="rg1"/> + <param name="PL" value="CAPILLARY"/> + <param name="source" value="history" /> + <param name="input_1" value="bowtie2-fq1.fq" ftype="fastqsanger"/> + <param name="input_2" value="bowtie2-fq2.fq" ftype="fastqsanger"/> + <param name="own_file" value="bowtie2-ref.fasta" /> + <output name="output" file="bowtie2-test2.bam" ftype="bam" lines_diff="2"/> + </test> </tests> <help> @@ -541,7 +615,7 @@ pipelines. --phred64 - Input qualities are ASCII chars equal to the [Phred quality] plus 64. This is + Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding. --solexa-quals @@ -551,7 +625,7 @@ --int-quals Quality values are represented in the read input file as space-separated ASCII integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`.... - Integers are treated as being on the [Phred quality] scale unless + Integers are treated as being on the Phred quality scale unless `--solexa-quals` is also specified. Default: off. ------ @@ -591,19 +665,19 @@ **Alignment options**:: -N <int> - Sets the number of mismatches to allowed in a seed alignment during [multiseed - alignment]. Can be set to 0 or 1. Setting this higher makes alignment slower + Sets the number of mismatches to allowed in a seed alignment during multiseed + alignment. Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) but increases sensitivity. Default: 0. -L <int> - Sets the length of the seed substrings to align during [multiseed alignment]. - Smaller values make alignment slower but more senstive. Default: the - `--sensitive` preset is used by default, which sets `-L` to 20 both in - `--end-to-end` mode and in `--local` mode. + Sets the length of the seed substrings to align during multiseed alignment. + Smaller values make alignment slower but more sensitive. Default: the + `--sensitive` preset is used by default, which sets `-L` to 22 in + `--end-to-end` mode and to 20 in `--local` mode. -i <func> Sets a function governing the interval between seed substrings to use during - [multiseed alignment]. For instance, if the read has 30 characers, and seed + multiseed alignment. For instance, if the read has 30 characers, and seed length is 10, and the seed interval is 6, the seeds extracted will be: Read: TAGCTACGCTCTACGCTATCATGCATAAAC @@ -620,7 +694,7 @@ the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. - See also: [setting function options]. If the function returns a result less than + If the function returns a result less than 1, it is rounded up to 1. Default: the `--sensitive` preset is used by default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75` in `--local` mode. @@ -629,8 +703,8 @@ Sets a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length. For instance, specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, - where x is the read length. See also: [setting function options]. Reads - exceeding this ceiling are [filtered out]. Default: `L,0,0.15`. + where x is the read length. Reads exceeding this ceiling are filtered out. + Default: `L,0,0.15`. --dpad <int> "Pads" dynamic programming problems by `<int>` columns on either side to allow @@ -658,14 +732,14 @@ --no-1mm-upfront By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch - end-to-end alignment for the read *before* trying the [multiseed heuristic]. Such + end-to-end alignment for the read *before* trying the multiseed heuristic. Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments. However, this can lead to unexpected - alignments when the user also sets options governing the [multiseed heuristic], + alignments when the user also sets options governing the multiseed heuristic, like `-L` and `-N`. For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported. This option prevents Bowtie 2 from searching for 1-mismatch end-to-end - alignments before using the [multiseed heuristic], which leads to the expected + alignments before using the multiseed heuristic, which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed. @@ -721,8 +795,7 @@ Sets a function governing the minimum alignment score needed for an alignment to be considered "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` - to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting - function options]. The default in `--end-to-end` mode is `L,-0.6,-0.6` and + to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`. ----- @@ -840,18 +913,15 @@ --dovetail If the mates "dovetail", that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that - to be concordant. See also: [Mates can overlap, contain or dovetail each - other]. Default: mates cannot dovetail in a concordant alignment. + to be concordant. Default: mates cannot dovetail in a concordant alignment. --no-contain If one mate alignment contains the other, consider that to be non-concordant. - See also: [Mates can overlap, contain or dovetail each other]. Default: a mate - can contain the other in a concordant alignment. + Default: a mate can contain the other in a concordant alignment. --no-overlap If one mate alignment overlaps the other at all, consider that to be - non-concordant. See also: [Mates can overlap, contain or dovetail each other]. - Default: mates can overlap in a concordant alignment. + non-concordant. Default: mates can overlap in a concordant alignment. ------ @@ -866,9 +936,9 @@ --rg <text> Add `<text>` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the `@RG` header line. Note: in order for the `@RG` line to appear, `--rg-id` - must also be specified. This is because the `ID` tag is required by the [SAM - Spec][SAM]. Specify `--rg` multiple times to set multiple fields. See the - [SAM Spec][SAM] for details about what fields are legal. + must also be specified. This is because the `ID` tag is required by the SAM + Specification. Specify `--rg` multiple times to set multiple fields. See the + SAM Specification for details about what fields are legal. --omit-sec-seq When printing secondary alignments, Bowtie 2 by default will write out the `SEQ`
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_group_macros.xml Tue Oct 13 12:13:27 2015 -0400 @@ -0,0 +1,294 @@ +<macros> + <!-- Import this at the top of your command block and then + define rg_auto_name. --> + <token name="@define_read_group_helpers@"> +#def identifier_or_name($input1) + #if hasattr($input1, 'element_identifier') + #return $input1.element_identifier + #else + #return $input1.name.rstrip('.gz').rstrip('.fastq').rstrip('.fq') + #end if +#end def + +#def clean(name) + #import re + #set $name_clean = re.sub('[^\w\-_\.]', '_', $name) + #return $name_clean +#end def + +#def read_group_name_default($input1, $input2=None) + #if $input2 is None + #return $clean($identifier_or_name($input1)) + #else + #import itertools + #set $input_name1 = $clean($identifier_or_name($input1)) + #set $input_name2 = $clean($identifier_or_name($input2)) + #set $common_prefix = ''.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))]) + #if len($common_prefix) > 3 + #return $common_prefix + #else + #return $input_name1 + #end if + #end if +#end def + +#def format_read_group(prefix, value, quote='', arg='') + #if $value + #return $arg + $quote + $prefix + $value + $quote + #else + #return '' + #end if +#end def + +#def rg_param(name) + #if $varExists("rg") + #return $rg.get($name, None) + #else + #return $getVar($name, None) + #end if +#end def + +#set $use_rg = True + </token> + <!-- preconditions use_rg and rg_auto_name have been + defined. + --> + <token name="@set_read_group_vars@"> +#if $use_rg + #if $rg_param('read_group_id_conditional') is None + #set $rg_id = $rg_auto_name + #elif $rg_param('read_group_id_conditional').do_auto_name + #set $rg_id = $rg_auto_name + #else + #set $rg_id = str($rg_param('read_group_id_conditional').ID) + #end if + + #if $rg_param('read_group_sm_conditional') is None + #set $rg_sm = '' + #elif $rg_param('read_group_sm_conditional').do_auto_name + #set $rg_sm = $rg_auto_name + #else + #set $rg_sm = str($rg_param('read_group_sm_conditional').SM) + #end if + + #if $rg_param('PL') + #set $rg_pl = str($rg_param('PL')) + #else + #set $rg_pl = '' + #end if + + #if $rg_param('read_group_lb_conditional') is None + #set $rg_lb = '' + #elif $rg_param('read_group_lb_conditional').do_auto_name + #set $rg_lb = $rg_auto_name + #else + #set $rg_lb = str($rg_param('read_group_lb_conditional').LB) + #end if + + #if $rg_param('CN') + #set $rg_cn = str($rg_param('CN')) + #else + #set $rg_cn = '' + #end if + + #if $rg_param("DS") + #set $rg_ds = str($rg_param("DS")) + #else + #set $rg_ds = '' + #end if + + #if $rg_param("DT") + #set $rg_dt = str($rg_param("DT")) + #else + #set $rg_dt = '' + #end if + + #if $rg_param("FO") + #set $rg_fo = str($rg_param("FO")) + #else + #set $rg_fo = '' + #end if + + #if $rg_param("KS") + #set $rg_ks = str($rg_param("KS")) + #else + #set $rg_ks = '' + #end if + + #if $rg_param("PG") + #set $rg_pg = str($rg_param("PG")) + #else + #set $rg_pg = '' + #end if + + #if str($rg_param("PI")) + #set $rg_pi = str($rg_param("PI")) + #else + #set $rg_pi = '' + #end if + + #if $rg_param("PU") + #set $rg_pu = str($rg_param("PU")) + #else + #set $rg_pu = '' + #end if +#end if + </token> + <token name="@set_use_rg_var@"> +#set $use_rg = str($rg.rg_selector) != "do_not_set" + </token> + <xml name="read_group_auto_name_conditional"> + <param name="do_auto_name" type="boolean" label="Auto-assign" help="Use dataset name or collection information to automatically assign this value" checked="no" /> + <when value="true"> + </when> + <when value="false"> + <yield /> + </when> + </xml> + <xml name="read_group_id_param"> + <param name="ID" type="text" value="" label="Read group identifier (ID)" help="This value must be unique among multiple samples in your experiment" optional="false"> + <validator type="empty_field" /> + </param> + </xml> + <xml name="read_group_id_conditional"> + <conditional name="read_group_id_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_id_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_sm_param"> + <param name="SM" type="text" value="" label="Read group sample name (SM)" help="This value should be descriptive. Use pool name where a pool is being sequenced" /> + </xml> + <xml name="read_group_sm_conditional"> + <conditional name="read_group_sm_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_sm_param" /> + </expand> + </conditional> + </xml> + <!-- Above SM param is optional (for SAM/BAM spec, this is required + as per Picard. + --> + <xml name="read_group_sm_param_required"> + <param name="SM" type="text" value="" label="Read group sample name (SM)" optional="false" help="This value should be descriptive. Use pool name where a pool is being sequenced"> + <validator type="empty_field" /> + </param> + </xml> + <xml name="read_group_sm_required_conditional"> + <conditional name="read_group_sm_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_sm_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_pl_param"> + <param name="PL" type="select" label="Platform/technology used to produce the reads (PL)"> + <option value="CAPILLARY">CAPILLARY</option> + <option value="LS454">LS454</option> + <option selected="True" value="ILLUMINA">ILLUMINA</option> + <option value="SOLID">SOLID</option> + <option value="HELICOS">HELICOS</option> + <option value="IONTORRENT">IONTORRENT</option> + <option value="PACBIO">PACBIO</option> + </param> + </xml> + <xml name="read_group_lb_param"> + <param name="LB" type="text" label="Library name (LB)" optional="true" /> + </xml> + <xml name="read_group_lb_conditional"> + <conditional name="read_group_lb_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_lb_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_lb_required_param"> + <param name="LB" type="text" label="Library name (LB)" optional="false"> + <validator type="empty_field" /> + </param> + </xml> + <xml name="read_group_lb_required_conditional"> + <conditional name="read_group_lb_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_lb_required_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_cn_param"> + <param name="CN" type="text" label="Sequencing center that produced the read (CN)" /> + </xml> + <xml name="read_group_ds_param"> + <param name="DS" type="text" label="Description (DS)" /> + </xml> + <xml name="read_group_dt_param"> + <param name="DT" type="text" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" /> + </xml> + <xml name="read_group_fo_param"> + <param name="FO" type="text" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\*|[ACMGRSVTWYHKDBN]+/"> + <validator type="regex" message="Invalid flow order">\*|[ACMGRSVTWYHKDBN]+$</validator> + </param> + </xml> + <xml name="read_group_ks_param"> + <param name="KS" type="text" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" /> + </xml> + <xml name="read_group_pg_param"> + <param name="PG" type="text" label="Programs used for processing the read group (PG)" /> + </xml> + <xml name="read_group_pi_param"> + <param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" /> + </xml> + <xml name="read_group_pu_param"> + <param name="PU" type="text" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="True" /> + </xml> + <xml name="read_group_pu_required_param"> + <param name="PU" type="text" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="False" /> + </xml> + <!-- Only ID is required - all groups available --> + <xml name="read_group_inputs_spec"> + <expand macro="read_group_id_conditional" /> + <expand macro="read_group_sm_conditional" /> + <expand macro="read_group_pl_param" /> + <expand macro="read_group_lb_conditional" /> + <expand macro="read_group_cn_param" /> + <expand macro="read_group_ds_param" /> + <expand macro="read_group_dt_param" /> + <expand macro="read_group_fo_param" /> + <expand macro="read_group_ks_param" /> + <expand macro="read_group_pg_param" /> + <expand macro="read_group_pi_param" /> + <expand macro="read_group_pu_param" /> + </xml> + <!-- ID, SM, LB, PU, PL all required - not ks, pg, or fo params. --> + <xml name="read_group_inputs_picard"> + <expand macro="read_group_id_conditional" /> + <expand macro="read_group_sm_required_conditional" /> + <expand macro="read_group_lb_required_conditional" /> + <expand macro="read_group_pl_param" /> + <expand macro="read_group_pu_required_param" /> + <expand macro="read_group_cn_param" /> + <expand macro="read_group_ds_param" /> + <expand macro="read_group_pi_param" /> + <expand macro="read_group_dt_param" /> + </xml> + <xml name="read_group_conditional"> + <conditional name="rg"> + <param name="rg_selector" type="select" label="Set read groups information?" help="Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets."> + <option value="set">Set read groups (SAM/BAM specification)</option> + <option value="set_picard">Set read groups (Picard style)</option> + <option value="set_id_auto">Automatically assign ID</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set_picard"> + <expand macro="read_group_inputs_picard" /> + </when> + <when value="set"> + <expand macro="read_group_inputs_spec" /> + </when> + <when value="set_id_auto"> + </when> + <when value="do_not_set"> + </when> + </conditional> + </xml> +</macros>