Mercurial > repos > devteam > bwa
changeset 8:cbc665adcde4 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bwa commit c355891532cecaab6b3288a148a6b3bcb5973396
| author | iuc | 
|---|---|
| date | Fri, 24 Nov 2017 09:55:28 -0500 | 
| parents | c022e4a68b76 | 
| children | a628f5606f68 | 
| files | bwa-mem.xml bwa.xml bwa_macros.xml | 
| diffstat | 3 files changed, 731 insertions(+), 837 deletions(-) [+] | 
line wrap: on
 line diff
--- a/bwa-mem.xml Tue Nov 21 11:23:45 2017 -0500 +++ b/bwa-mem.xml Fri Nov 24 09:55:28 2017 -0500 @@ -1,63 +1,65 @@ <?xml version="1.0"?> -<tool id="bwa_mem" name="Map with BWA-MEM" version="@VERSION@.0"> - <description>- map medium and long reads (> 100 bp) against reference genome</description> - <macros> - <import>read_group_macros.xml</import> - <import>bwa_macros.xml</import> - </macros> - <expand macro="requirements" /> - <expand macro="stdio" /> - <command> +<tool id="bwa_mem" name="Map with BWA-MEM" version="@VERSION@.1"> + <description>- map medium and long reads (> 100 bp) against reference genome</description> + <macros> + <import>read_group_macros.xml</import> + <import>bwa_macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command> <![CDATA[ - @set_reference_fasta_filename@ +@set_reference_fasta_filename@ - ## Begin BWA-MEM command line +## Begin BWA-MEM command line - bwa mem - -t "\${GALAXY_SLOTS:-1}" - -v 1 ## Verbosity is set to 1 (errors only) +bwa mem +-t "\${GALAXY_SLOTS:-1}" +## Verbosity is set to 1 (errors only) +-v 1 - #if str( $fastq_input.fastq_input_selector ) == "paired_iv": ## For interleaved fastq files set -p option - -p - #if str( $fastq_input.iset_stats ): ## check that insert statistics is used - -I "${fastq_input.iset_stats}" - #end if +#if str( $fastq_input.fastq_input_selector ) == "paired_iv": + ## For interleaved fastq files set -p option + -p + ## check that insert statistics is used + #if str( $fastq_input.iset_stats ): + -I '${fastq_input.iset_stats}' #end if +#end if - #if str( $analysis_type.analysis_type_selector ) == "pacbio": - -x pacbio - #elif str( $analysis_type.analysis_type_selector ) == "ont2d": - -x ont2d - #elif str( $analysis_type.analysis_type_selector ) == "intractg": - -x intractg - #elif str( $analysis_type.analysis_type_selector ) == "full": - #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "set": ## Algorithmic options - -k "${analysis_type.algorithmic_options.k}" - -w "${analysis_type.algorithmic_options.w}" - -d "${analysis_type.algorithmic_options.d}" - -r "${analysis_type.algorithmic_options.r}" - -y "${analysis_type.algorithmic_options.y}" - -c "${analysis_type.algorithmic_options.c}" - -D "${analysis_type.algorithmic_options.D}" - -W "${analysis_type.algorithmic_options.W}" - -m "${analysis_type.algorithmic_options.m}" +#if str( $analysis_type.analysis_type_selector ) not in ["illumina", "full"]: + -x '$analysis_type.analysis_type_selector' +#elif str( $analysis_type.analysis_type_selector ) == "full": + ## Algorithmic options + #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "set": + -k '${analysis_type.algorithmic_options.k}' + -w '${analysis_type.algorithmic_options.w}' + -d '${analysis_type.algorithmic_options.d}' + -r '${analysis_type.algorithmic_options.r}' + -y '${analysis_type.algorithmic_options.y}' + -c '${analysis_type.algorithmic_options.c}' + -D '${analysis_type.algorithmic_options.D}' + -W '${analysis_type.algorithmic_options.W}' + -m '${analysis_type.algorithmic_options.m}' ${analysis_type.algorithmic_options.S} ${analysis_type.algorithmic_options.P} ${analysis_type.algorithmic_options.e} - #end if + #end if - #if str( $analysis_type.scoring_options.scoring_options_selector ) == "set": ## Scoring options - -A "${analysis_type.scoring_options.A}" - -B "${analysis_type.scoring_options.B}" - -O "${analysis_type.scoring_options.O}" - -E "${analysis_type.scoring_options.E}" - -L "${analysis_type.scoring_options.L}" - -U "${analysis_type.scoring_options.U}" - #end if + ## Scoring options + #if str( $analysis_type.scoring_options.scoring_options_selector ) == "set": + -A '${analysis_type.scoring_options.A}' + -B '${analysis_type.scoring_options.B}' + -O '${analysis_type.scoring_options.O}' + -E '${analysis_type.scoring_options.E}' + -L '${analysis_type.scoring_options.L}' + -U '${analysis_type.scoring_options.U}' + #end if - #if str( $analysis_type.io_options.io_options_selector ) == "set": ## IO options - -T "${analysis_type.io_options.T}" - -h "${analysis_type.io_options.h}" + ## IO options + #if str( $analysis_type.io_options.io_options_selector ) == "set": + -T '${analysis_type.io_options.T}' + -h '${analysis_type.io_options.h}' ${analysis_type.io_options.a} ${analysis_type.io_options.C} ${analysis_type.io_options.V} @@ -65,248 +67,247 @@ ${analysis_type.io_options.M} ${analysis_type.io_options.five} ${analysis_type.io_options.q} - #end if - - #end if - - ## Handle read group options... - @define_read_group_helpers@ - #if str( $fastq_input.fastq_input_selector ) == "paired": - #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2) - #else: - #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1) - #end if - @set_use_rg_var@ - @set_read_group_vars@ - #if $use_rg - @set_rg_string@ - -R '$rg_string' #end if - #if str( $fastq_input.fastq_input_selector ) == "paired": - #if str( $fastq_input.iset_stats ): ## check that insert statistics is used - -I "${fastq_input.iset_stats}" - #end if +#end if - "${reference_fasta_filename}" - "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}" - #elif str( $fastq_input.fastq_input_selector ) == "paired_collection": - #if str( $fastq_input.iset_stats ): ## check that insert statistics is used - -I "${fastq_input.iset_stats}" - #end if +## Handle read group options... +@define_read_group_helpers@ +#if str( $fastq_input.fastq_input_selector ) == "paired": + #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2) +#else: + #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1) +#end if +@set_use_rg_var@ +@set_read_group_vars@ +#if $use_rg + @set_rg_string@ + -R '$rg_string' +#end if - "${reference_fasta_filename}" - "${fastq_input.fastq_input1.forward}" "${fastq_input.fastq_input1.reverse}" - #else: - "${reference_fasta_filename}" - "${fastq_input.fastq_input1}" +#if str( $fastq_input.fastq_input_selector ) == "paired": + ## check that insert statistics is used + #if str( $fastq_input.iset_stats ): + -I '${fastq_input.iset_stats}' + #end if + + '${reference_fasta_filename}' + '${fastq_input.fastq_input1}' '${fastq_input.fastq_input2}' +#elif str( $fastq_input.fastq_input_selector ) == "paired_collection": + ## check that insert statistics is used + #if str( $fastq_input.iset_stats ): + -I '${fastq_input.iset_stats}' #end if - | samtools sort -O bam -o '$bam_output' -]]> - </command> + '${reference_fasta_filename}' + '${fastq_input.fastq_input1.forward}' '${fastq_input.fastq_input1.reverse}' +#else: + '${reference_fasta_filename}' + '${fastq_input.fastq_input1}' +#end if - <inputs> - <expand macro="reference_source_conditional" /> - <conditional name="fastq_input"> - <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> - <option value="paired">Paired</option> - <option value="single">Single</option> - <option value="paired_collection">Paired Collection</option> - <option value="paired_iv">Paired Interleaved</option> - </param> - <when value="paired"> - <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/> - <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/> - <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> - <sanitizer invalid_char=""> - <valid initial="string.digits"><add value=","/> </valid> - </sanitizer> - </param> - </when> - <when value="single"> - <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/> - </when> - <when value="paired_collection"> - <param name="fastq_input1" format="fastqsanger,fastqsanger.gz,fasta" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> - <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> - <sanitizer invalid_char=""> - <valid initial="string.digits"><add value=","/> </valid> - </sanitizer> - </param> - </when> - <when value="paired_iv"> - <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with interleaved reads"/> - <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> - <sanitizer invalid_char=""> - <valid initial="string.digits"><add value=","/> </valid> - </sanitizer> - </param> - </when> - </conditional> +| samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '$bam_output' +]]> + </command> - <expand macro="read_group_conditional" /> - - <conditional name="analysis_type"> - <param name="analysis_type_selector" type="select" label="Select analysis mode"> - <option value="illumina">1.Simple Illumina mode</option> - <option value="pacbio">2.PacBio mode (-x pacbio)</option> - <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option> - <option value="intractg">4.Intra-species contigs mode (-x intractg)</option> - <option value="full">5.Full list of options</option> - </param> - <when value="illumina"> - <!-- do nothing --> - </when> - <when value="pacbio"> - <!-- do nothing. all magic happens within <command> tag --> - </when> - <when value="ont2d"> - <!-- do nothing. all magic happens within <command> tag --> - </when> - <when value="intractg"> - <!-- do nothing. all magic happens within <command> tag --> - </when> - <when value="full"> - <conditional name="algorithmic_options"> - <param name="algorithmic_options_selector" type="select" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options."> - <option value="set">Set</option> - <option value="do_not_set" selected="True">Do not set</option> - </param> - <when value="set"> - <param name="k" type="integer" value="19" label="Minimum seed length" help="-k; default=19"/> - <param name="w" type="integer" value="100" label="Band width for banded alignment" help="-w; default=100"/> - <param name="d" type="integer" value="100" label="Off-diagonal X-dropoff" help="-d; default=100"/> - <param name="r" type="float" value="1.5" label="Look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5; This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy" /> - <param name="y" type="integer" value="20" label="Seed occurrence for the 3rd round seeding" help="-y; default=20" /> - <param name="c" type="integer" value="500" label="Skip seeds with more than that many occurrences" help="-c; default=500"/> - <param name="D" type="float" value="0.5" label="Drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/> - <param name="W" type="integer" value="0" label="Discard a chain if seeded bases shorter than THIS VALUE" help="-W; default=0"/> - <param name="m" type="integer" value="50" label="Perform at most this many rounds of mate rescues for each read" help="-m; default=50"/> - <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Skip mate rescue" help="-S"/> - <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip pairing; mate rescue performed unless -S also in use" help="-P"/> - <param name="e" type="boolean" truevalue="-e" falsevalue="" label="Discard full-length exact matches" help="-e"/> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> + <inputs> + <expand macro="reference_source_conditional" /> + <conditional name="fastq_input"> + <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> + <option value="paired">Paired</option> + <option value="single">Single</option> + <option value="paired_collection">Paired Collection</option> + <option value="paired_iv">Paired Interleaved</option> + </param> + <when value="paired"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/> + <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/> + <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> + <sanitizer invalid_char=""> + <valid initial="string.digits"><add value=","/> </valid> + </sanitizer> + </param> + </when> + <when value="single"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/> + </when> + <when value="paired_collection"> + <param name="fastq_input1" format="fastqsanger,fastqsanger.gz,fasta" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> + <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> + <sanitizer invalid_char=""> + <valid initial="string.digits"><add value=","/> </valid> + </sanitizer> + </param> + </when> + <when value="paired_iv"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with interleaved reads"/> + <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> + <sanitizer invalid_char=""> + <valid initial="string.digits"><add value=","/> </valid> + </sanitizer> + </param> + </when> </conditional> - <conditional name="scoring_options"> - <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options."> - <option value="set">Set</option> - <option value="do_not_set" selected="True">Do not set</option> - </param> - <when value="set"> - <param name="A" type="integer" value="1" label="Score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U unless overridden; default=1"/> - <param name="B" type="integer" value="4" label="Penalty for a mismatch" help="-B; default=4"/> - <param name="O" type="text" value="6,6" label="Gap open penalties for deletions and insertions" help="-O; default=6,6"> - <sanitizer invalid_char=""> - <valid initial="string.digits"><add value=","/> </valid> - </sanitizer> - </param> - <param name="E" type="text" value="1,1" label="Gap extension penalties; a gap of size k cost '-O + -E*k'. If two numbers are specified, the first is the penalty of extending a deletion and the second for extending an insertion" help="-E; default=1,1"> - <sanitizer invalid_char=""> - <valid initial="string.digits"><add value=","/> </valid> - </sanitizer> - </param> - <param name="L" type="text" value="5,5" label="Penalties for 5'-end and 3'-end clipping" help="-L; default=5,5; When performing Smith-Waterman extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best Smith-Waterman score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best Smith-Waterman score; clipping penalty is not deduced"> - <sanitizer invalid_char=""> - <valid initial="string.digits"><add value=","/> </valid> - </sanitizer> + <expand macro="read_group_conditional" /> + + <conditional name="analysis_type"> + <param name="analysis_type_selector" type="select" label="Select analysis mode"> + <option value="illumina">1.Simple Illumina mode</option> + <option value="pacbio">2.PacBio mode (-x pacbio)</option> + <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option> + <option value="intractg">4.Intra-species contigs mode (-x intractg)</option> + <option value="full">5.Full list of options</option> </param> - <param name="U" type="integer" value="17" label="Penalty for an unpaired read pair" help="-U; default=17"/> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> + <when value="illumina"> + <!-- do nothing --> + </when> + <when value="pacbio"> + <!-- do nothing. all magic happens within <command> tag --> + </when> + <when value="ont2d"> + <!-- do nothing. all magic happens within <command> tag --> + </when> + <when value="intractg"> + <!-- do nothing. all magic happens within <command> tag --> + </when> + <when value="full"> + <conditional name="algorithmic_options"> + <param name="algorithmic_options_selector" type="select" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options."> + <option value="set">Set</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set"> + <param name="k" type="integer" value="19" label="Minimum seed length" help="-k; default=19"/> + <param name="w" type="integer" value="100" label="Band width for banded alignment" help="-w; default=100"/> + <param name="d" type="integer" value="100" label="Off-diagonal X-dropoff" help="-d; default=100"/> + <param name="r" type="float" value="1.5" label="Look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5; This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy" /> + <param name="y" type="integer" value="20" label="Seed occurrence for the 3rd round seeding" help="-y; default=20" /> + <param name="c" type="integer" value="500" label="Skip seeds with more than that many occurrences" help="-c; default=500"/> + <param name="D" type="float" value="0.5" label="Drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/> + <param name="W" type="integer" value="0" label="Discard a chain if seeded bases shorter than THIS VALUE" help="-W; default=0"/> + <param name="m" type="integer" value="50" label="Perform at most this many rounds of mate rescues for each read" help="-m; default=50"/> + <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Skip mate rescue" help="-S"/> + <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip pairing; mate rescue performed unless -S also in use" help="-P"/> + <param name="e" type="boolean" truevalue="-e" falsevalue="" label="Discard full-length exact matches" help="-e"/> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> - <conditional name="io_options"> - <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options."> - <option value="set">Set</option> - <option value="do_not_set" selected="True">Do not set</option> - </param> - <when value="set"> - <param name="five" argument="-5" type="boolean" truevalue="-5" falsevalue="" label="For split alignment, take alignment with smallest coordinate as primary" help="Useful for HiC data"/> - <param argument="-q" type="boolean" truevalue="-q" falsevalue="" label="Don't lower MAPQ for split alignment" help="By default the MAPQ score of a supplementary alignment will be lowered to the primary alignment score."/> - <param name="T" type="integer" value="30" label="Minimum score to output" help="-T; default=30"/> - <param name="h" type="integer" value="5" label="If there are less than THIS VALUE hits with score >80% of the max score, output them all in the XA tag" help="-h; default=5" /> - <param name="a" type="boolean" truevalue="-a" falsevalue="" label="Output all alignments for single-ends or unpaired paired-ends" help="-a; These alignments will be flagged as secondary alignments"/> - <param name="C" type="boolean" truevalue="-C" falsevalue="" label="Append FASTA/FASTQ comment to BAM output" help="-C"/> - <param name="V" type="boolean" truevalue="-V" falsevalue="" label="Output the reference FASTA header in the XR tag" help="-C"/> - <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments" help="-Y; By default, BWA-MEM uses soft clipping for the primary alignment and hard clipping for supplementary alignments" /> - <param name="M" type="boolean" truevalue="-M" falsevalue="" label="Mark shorter split hits of a chimeric alignment in the FLAG field as 'secondary alignment' instead of 'supplementary alignment'" help="-M; For Picard<1.96 compatibility" /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> + <conditional name="scoring_options"> + <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options."> + <option value="set">Set</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set"> + <param name="A" type="integer" value="1" label="Score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U unless overridden; default=1"/> + <param name="B" type="integer" value="4" label="Penalty for a mismatch" help="-B; default=4"/> + <param name="O" type="text" value="6,6" label="Gap open penalties for deletions and insertions" help="-O; default=6,6"> + <sanitizer invalid_char=""> + <valid initial="string.digits"><add value=","/> </valid> + </sanitizer> + </param> + <param name="E" type="text" value="1,1" label="Gap extension penalties; a gap of size k cost '-O + -E*k'. If two numbers are specified, the first is the penalty of extending a deletion and the second for extending an insertion" help="-E; default=1,1"> + <sanitizer invalid_char=""> + <valid initial="string.digits"><add value=","/> </valid> + </sanitizer> + </param> + <param name="L" type="text" value="5,5" label="Penalties for 5'-end and 3'-end clipping" help="-L; default=5,5; When performing Smith-Waterman extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best Smith-Waterman score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best Smith-Waterman score; clipping penalty is not deduced"> + <sanitizer invalid_char=""> + <valid initial="string.digits"><add value=","/> </valid> + </sanitizer> + </param> + <param name="U" type="integer" value="17" label="Penalty for an unpaired read pair" help="-U; default=17"/> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + + <conditional name="io_options"> + <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options."> + <option value="set">Set</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set"> + <param name="five" argument="-5" type="boolean" truevalue="-5" falsevalue="" label="For split alignment, take alignment with smallest coordinate as primary" help="Useful for HiC data"/> + <param argument="-q" type="boolean" truevalue="-q" falsevalue="" label="Don't lower MAPQ for split alignment" help="By default the MAPQ score of a supplementary alignment will be lowered to the primary alignment score."/> + <param name="T" type="integer" value="30" label="Minimum score to output" help="-T; default=30"/> + <param name="h" type="integer" value="5" label="If there are less than THIS VALUE hits with score >80% of the max score, output them all in the XA tag" help="-h; default=5" /> + <param name="a" type="boolean" truevalue="-a" falsevalue="" label="Output all alignments for single-ends or unpaired paired-ends" help="-a; These alignments will be flagged as secondary alignments"/> + <param name="C" type="boolean" truevalue="-C" falsevalue="" label="Append FASTA/FASTQ comment to BAM output" help="-C"/> + <param name="V" type="boolean" truevalue="-V" falsevalue="" label="Output the reference FASTA header in the XR tag" help="-C"/> + <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments" help="-Y; By default, BWA-MEM uses soft clipping for the primary alignment and hard clipping for supplementary alignments" /> + <param name="M" type="boolean" truevalue="-M" falsevalue="" label="Mark shorter split hits of a chimeric alignment in the FLAG field as 'secondary alignment' instead of 'supplementary alignment'" help="-M; For Picard<1.96 compatibility" /> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </conditional> + </when> </conditional> - </when> - </conditional> - </inputs> + </inputs> - <outputs> - <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> - <expand macro="dbKeyActionsBwaMem" /> - </data> - </outputs> + <outputs> + <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> + <expand macro="dbKeyActionsBwaMem" /> + </data> + </outputs> - <tests> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="fastq_input_selector" value="paired"/> - <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> - <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="fastq_input_selector" value="single"/> - <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="2" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="fastq_input_selector" value="paired"/> - <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/> - <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="index_a" value="is"/> - <param name="fastq_input_selector" value="paired"/> - <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> - <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> - <param name="rg_selector" value="set"/> - <param name="ID" value="rg1"/> - <param name="PL" value="CAPILLARY"/> - <param name="LB" value="AARDVARK-1" /> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" /> - </test> - </tests> - <help> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="fastq_input_selector" value="paired"/> + <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> + <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="fastq_input_selector" value="single"/> + <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="2" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="fastq_input_selector" value="paired"/> + <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/> + <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="index_a" value="is"/> + <param name="fastq_input_selector" value="paired"/> + <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> + <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> + <param name="rg_selector" value="set"/> + <param name="ID" value="rg1"/> + <param name="PL" value="CAPILLARY"/> + <param name="LB" value="AARDVARK-1" /> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" /> + </test> + </tests> + <help><![CDATA[ **What is does** From http://arxiv.org/abs/1303.3997: -BWA-MEM is a new alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human. +BWA-MEM is an alignment algorithm for aligning sequence reads or long query sequences against a large reference genome such as human. It automatically chooses between local and end-to-end alignments, supports paired-end reads and performs chimeric alignment. The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases. -For mapping 100bp sequences, BWA-MEM shows better performance than several state-of-art read aligners to date. -It is best suited for mapping long (>70 nt) reads against large reference genomes. - -This Galaxy tool wraps bwa-mem module of bwa read mapping tool. Galaxy implementation takes fastq files as input and produces output in BAM (not SAM) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). +This Galaxy tool wraps bwa-mem module of bwa read mapping tool. The Galaxy implementation takes fastq files as input and produces output in BAM format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). ----- @@ -314,9 +315,9 @@ Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options: - 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. + 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`. - + If your genome of interest is not listed here you have two choices: 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added @@ -328,74 +329,23 @@ Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are: - 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] - 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format> + 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] + 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format> 3. *Full list of options*: Allows access to all options through Galaxy interface. ------- - -**BWA MEM options** - -Each Galaxy parameter widget corresponds to command line flags listed below: - -Algorithm options:: - - -k INT minimum seed length [19] - -w INT band width for banded alignment [100] - -d INT off-diagonal X-dropoff [100] - -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5] - -y INT find MEMs longer than {-k} * {-r} with size less than INT [0] - -c INT skip seeds with more than INT occurrences [500] - -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50] - -W INT discard a chain if seeded bases shorter than INT [0] - -m INT perform at most INT rounds of mate rescues for each read [50] - -S skip mate rescue - -P skip pairing; mate rescue performed unless -S also in use - -e discard full-length exact matches - -Scoring options:: - - -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1] - -B INT penalty for a mismatch [4] - -O INT[,INT] gap open penalties for deletions and insertions [6,6] - -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1] - -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5] - -U INT penalty for an unpaired read pair [17] - -Input/output options:: - - -p first query file consists of interleaved paired-end sequences - -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null] - - -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [3] - -T INT minimum score to output [30] - -h INT if there are <INT hits with score >80% of the max score, output all in XA [5] - -a output all alignments for SE or unpaired PE - -C append FASTA/FASTQ comment to SAM output - -V output the reference FASTA header in the XR tag - -Y use soft clipping for supplementary alignments - -M mark shorter split hits as secondary - - -I FLOAT[,FLOAT[,INT[,INT]]] - specify the mean, standard deviation (10% of the mean if absent), max - (4 sigma from the mean if absent) and min of the insert size distribution. - FR orientation only. [inferred] - -@dataset_collections@ - @RG@ @info@ - </help> - <citations> - <citation type="doi">10.1093/bioinformatics/btp324</citation> - <citation type="doi">10.1093/bioinformatics/btp698</citation> - <citation type="bibtex">@misc{1303.3997, -Author = {Heng Li}, -Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM}, -Year = {2013}, -Eprint = {arXiv:1303.3997}, -url = {http://arxiv.org/abs/1303.3997}, -}</citation> - </citations> + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btp324</citation> + <citation type="doi">10.1093/bioinformatics/btp698</citation> + <citation type="bibtex">@misc{1303.3997, + Author = {Heng Li}, + Title = {Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM}, + Year = {2013}, + Eprint = {arXiv:1303.3997}, + url = {http://arxiv.org/abs/1303.3997}, + }</citation> + </citations> </tool>
--- a/bwa.xml Tue Nov 21 11:23:45 2017 -0500 +++ b/bwa.xml Fri Nov 24 09:55:28 2017 -0500 @@ -1,457 +1,412 @@ <?xml version="1.0"?> -<tool id="bwa" name="Map with BWA" version="@VERSION@.2"> - <description>- map short reads (< 100 bp) against reference genome</description> - <macros> - <import>read_group_macros.xml</import> - <import>bwa_macros.xml</import> - <token name="@command_options@"> - #if str( $analysis_type.analysis_type_selector ) == "full": - -n ${analysis_type.n} - -o ${analysis_type.o} - -e ${analysis_type.e} - -i ${analysis_type.i} - -d ${analysis_type.d} - -l ${analysis_type.l} - -k ${analysis_type.k} - -m ${analysis_type.m} - -M ${analysis_type.M} - -O ${analysis_type.O} - -E ${analysis_type.E} - -R ${analysis_type.R} - -q ${analysis_type.q} - - #if str( $analysis_type.B ): +<tool id="bwa" name="Map with BWA" version="@VERSION@.3"> + <description>- map short reads (< 100 bp) against reference genome</description> + <macros> + <import>read_group_macros.xml</import> + <import>bwa_macros.xml</import> + <token name="@command_options@"> +#if str( $analysis_type.analysis_type_selector ) == "full": + -n ${analysis_type.n} + -o ${analysis_type.o} + -e ${analysis_type.e} + -i ${analysis_type.i} + -d ${analysis_type.d} + -l ${analysis_type.l} + -k ${analysis_type.k} + -m ${analysis_type.m} + -M ${analysis_type.M} + -O ${analysis_type.O} + -E ${analysis_type.E} + -R ${analysis_type.R} + -q ${analysis_type.q} + #if str( $analysis_type.B ): -B ${analysis_type.B} - #end if - - #if str( $analysis_type.L ): + #end if + #if str( $analysis_type.L ): -L ${analysis_type.L} - #end if #end if - </token> - <token name="@read_group_options@"> - #if $use_rg: - @set_rg_string@ - -r '$rg_string' - #end if - </token> +#end if + </token> + <token name="@read_group_options@"> +#if $use_rg: + @set_rg_string@ + -r '$rg_string' +#end if + </token> + <xml name="advanced_pe_options"> + <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?" + help="Provides additional controls"> + <option value="set">Set</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set"> + <param name="a" type="integer" value="500" + label="Maximum insert size for a read pair to be considered being mapped properly." + help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/> + <param name="o" type="integer" value="100000" + label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read." + help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/> + <param name="n" type="integer" value="3" + label="Maximum number of alignments to output in the XA tag for reads paired properly." + help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/> + <param name="N" type="integer" value="10" + label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)." + help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/> + <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)" + help="sampe -c"/> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </xml> + <xml name="advanced_se_options"> + <param name="adv_se_options_selector" type="select" label="Set advanced single end options?" + help="Provides additional controls"> + <option value="set">Set</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set"> + <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag." + help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/> + </when> + <when value="do_not_set"> + <!-- do nothing --> + </when> + </xml> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command> + <![CDATA[ +@set_reference_fasta_filename@ - <xml name="advanced_pe_options"> - <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?" help="Provides additional controls"> - <option value="set">Set</option> - <option value="do_not_set" selected="True">Do not set</option> - </param> - <when value="set"> - <param name="a" type="integer" value="500" label="Maximum insert size for a read pair to be considered being mapped properly." help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/> - <param name="o" type="integer" value="100000" label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read." help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/> - <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly." help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/> - <param name="N" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)." help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/> - <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)" help="sampe -c"/> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </xml> - <xml name="advanced_se_options"> - <param name="adv_se_options_selector" type="select" label="Set advanced single end options?" help="Provides additional controls"> - <option value="set">Set</option> - <option value="do_not_set" selected="True">Do not set</option> - </param> - <when value="set"> - <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag." help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </xml> - </macros> - <expand macro="requirements" /> - <expand macro="stdio" /> - <command> -<![CDATA[ - @set_reference_fasta_filename@ +## setup vars for rg handling... +@define_read_group_helpers@ +#if str( $input_type.input_type_selector ) == "paired": + #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2) +#elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]: + #set $rg_auto_name = $read_group_name_default($input_type.bam_input) +#else + #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1) +#end if +@set_use_rg_var@ +@set_read_group_vars@ - ## setup vars for rg handling... - @define_read_group_helpers@ - #if str( $input_type.input_type_selector ) == "paired": - #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2) - #elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]: - #set $rg_auto_name = $read_group_name_default($input_type.bam_input) - #else - #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1) - #end if - @set_use_rg_var@ - @set_read_group_vars@ - - ## Begin bwa command line +## Begin bwa command line ####### Fastq paired - #if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection": - bwa aln - -t "\${GALAXY_SLOTS:-1}" - - @command_options@ - - "${reference_fasta_filename}" - - #if str( $input_type.input_type_selector ) == "paired_collection": - "${input_type.fastq_input1.forward}" - #else - "${input_type.fastq_input1}" - #end if - - > first.sai && +#if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection": + bwa aln + -t "\${GALAXY_SLOTS:-1}" + @command_options@ + '$reference_fasta_filename' + #if str( $input_type.input_type_selector ) == "paired_collection": + '${input_type.fastq_input1.forward}' + #else + '${input_type.fastq_input1}' + #end if + > first.sai && - bwa aln - -t "\${GALAXY_SLOTS:-1}" - - @command_options@ - - "${reference_fasta_filename}" + bwa aln + -t "\${GALAXY_SLOTS:-1}" + @command_options@ + '${reference_fasta_filename}' + #if str( $input_type.input_type_selector ) == "paired_collection": + '${input_type.fastq_input1.reverse}' + #else + '${input_type.fastq_input2}' + #end if + > second.sai && - #if str( $input_type.input_type_selector ) == "paired_collection": - "${input_type.fastq_input1.reverse}" - #else - "${input_type.fastq_input2}" - #end if - - > second.sai && - - bwa sampe - - #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True": + bwa sampe + #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True": -a ${$input_type.adv_pe_options.a} -o ${$input_type.adv_pe_options.o} -n ${$input_type.adv_pe_options.n} -N ${$input_type.adv_pe_options.N} - #end if - - @read_group_options@ + #end if + @read_group_options@ + #if str( $input_type.input_type_selector ) == "paired_collection": + '${reference_fasta_filename}' first.sai second.sai '${input_type.fastq_input1.forward}' '${input_type.fastq_input1.reverse}' + #else: + '${reference_fasta_filename}' first.sai second.sai '${input_type.fastq_input1}' '${input_type.fastq_input2}' + #end if - #if str( $input_type.input_type_selector ) == "paired_collection": - "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1.forward}" "${input_type.fastq_input1.reverse}" - #else: - "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1}" "${input_type.fastq_input2}" - #end if + ## Fastq single -####### Fastq single - - #elif str( $input_type.input_type_selector ) == "single": - bwa aln - -t "\${GALAXY_SLOTS:-1}" +#elif str( $input_type.input_type_selector ) == "single": + bwa aln + -t "\${GALAXY_SLOTS:-1}" - @command_options@ + @command_options@ - "${reference_fasta_filename}" - "${input_type.fastq_input1}" - > first.sai && - - bwa samse + '${reference_fasta_filename}' + '${input_type.fastq_input1}' + > first.sai && - #if str( $input_type.adv_se_options.adv_se_options_selector) == "True": - -n ${$input_type.adv_se_options.n} - #end if + bwa samse - @read_group_options@ - - "${reference_fasta_filename}" first.sai "${input_type.fastq_input1}" + #if str( $input_type.adv_se_options.adv_se_options_selector) == "True": + -n ${$input_type.adv_se_options.n} + #end if + @read_group_options@ + '${reference_fasta_filename}' first.sai '${input_type.fastq_input1}' ####### BAM paired - #elif str( $input_type.input_type_selector ) == "paired_bam": - bwa aln - -t "\${GALAXY_SLOTS:-1}" - -b - -1 - - @command_options@ - - "${reference_fasta_filename}" - "${input_type.bam_input}" - > first.sai && +#elif str( $input_type.input_type_selector ) == "paired_bam": + bwa aln + -t "\${GALAXY_SLOTS:-1}" + -b + -1 + @command_options@ + '${reference_fasta_filename}' + '${input_type.bam_input}' + > first.sai && - bwa aln - -t "\${GALAXY_SLOTS:-1}" - -b - -2 - @command_options@ - "${reference_fasta_filename}" - "${input_type.bam_input}" - > second.sai && + bwa aln + -t "\${GALAXY_SLOTS:-1}" + -b + -2 + @command_options@ + '${reference_fasta_filename}' + '${input_type.bam_input}' + > second.sai && - bwa sampe + bwa sampe - #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True": + #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True": -a ${$input_type.adv_bam_pe_options.a} -o ${$input_type.adv_bam_pe_options.o} -n ${$input_type.adv_bam_pe_options.n} -N ${$input_type.adv_bam_pe_options.N} - #end if - - @read_group_options@ - - "${reference_fasta_filename}" first.sai second.sai "${input_type.bam_input}" "${input_type.bam_input}" + #end if + @read_group_options@ + '${reference_fasta_filename}' first.sai second.sai '${input_type.bam_input}' '${input_type.bam_input}' ####### Fastq single ------------ to do next - #elif str( $input_type.input_type_selector ) == "single_bam": - bwa aln - -t "\${GALAXY_SLOTS:-1}" - -b - -0 - - @command_options@ - - "${reference_fasta_filename}" - "${input_type.bam_input}" - > first.sai && - - bwa samse - - #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True": - -n ${$input_type.adv_bam_se_options.n} - #end if - - @read_group_options@ +#elif str( $input_type.input_type_selector ) == "single_bam": + bwa aln + -t "\${GALAXY_SLOTS:-1}" + -b + -0 - "${reference_fasta_filename}" first.sai "${input_type.bam_input}" - #end if - - | samtools sort -O bam -o '$bam_output' -]]> - </command> + @command_options@ - <inputs> - <expand macro="reference_source_conditional" /> - <conditional name="input_type"> - <param name="input_type_selector" type="select" label="Select input type" help="Select between fastq and bam datasets and between paired and single end data"> - <option value="paired">Paired fastq</option> - <option value="paired_collection">Paired fastq collection</option> - <option value="single">Single fastq</option> - <option value="paired_bam">Paired BAM</option> - <option value="single_bam">Single BAM</option> - </param> - <when value="paired"> - <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/> - <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/> - <conditional name="adv_pe_options"> + '${reference_fasta_filename}' + '${input_type.bam_input}' + > first.sai && - <expand macro="advanced_pe_options" /> - - </conditional> - </when> - - <when value="paired_collection"> - <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> - <conditional name="adv_pe_options"> - - <expand macro="advanced_pe_options" /> - - </conditional> - </when> - - <when value="single"> - <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/> - <conditional name="adv_se_options"> - - <expand macro="advanced_se_options" /> + bwa samse - </conditional> - </when> - - <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options --> - - <when value="paired_bam"> - <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with paired reads"/> - <conditional name="adv_bam_pe_options"> - - <expand macro="advanced_pe_options" /> + #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True": + -n ${$input_type.adv_bam_se_options.n} + #end if + @read_group_options@ + '${reference_fasta_filename}' first.sai '${input_type.bam_input}' +#end if - </conditional> - </when> - - <when value="single_bam"> - <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with single reads"/> - <conditional name="adv_bam_se_options"> - - <expand macro="advanced_se_options" /> - - </conditional> - </when> +| samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '$bam_output' +]]> + </command> - </conditional> - - <expand macro="read_group_conditional" /> - - <conditional name="analysis_type"> - <param name="analysis_type_selector" type="select" label="Select analysis mode"> - <option value="illumina">1.Simple Illumina mode</option> - <option value="full">2.Full list of options</option> - </param> - <when value="illumina"> - <!-- do nothing --> - </when> - <when value="full"> - <param name="n" type="text" value="0.04" label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths." help="aln -n; default=0.04"/> - <param name="o" type="integer" value="1" label="maximum number or gap openings" help="aln -o; default=1"/> - <param name="e" type="integer" value="-1" label="maximum number of gap extensions" help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/> - <param name="i" type="integer" value="5" label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/> - <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion" help="aln -d; default=10"/> - <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/> - <param name="k" type="integer" value="2" label="maximum differences in the seed" help="aln -k; default=2"/> - <param name="m" type="integer" value="2000000" label="maximum entries in the queue" help="aln -m; default=2000000"/> - <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/> - <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/> - <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/> - <param name="R" type="integer" value="30" label="stop searching when there are more than this value of equally best hits" help="aln -R; default=30"/> - <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp" help="aln -q; default=0"/> - <param name="B" type="integer" optional="True" label="length of barcode" help="aln -B; optional parameter"/> - <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions" help="aln -L; optional parameter"/> - </when> - </conditional> - </inputs> - - <outputs> - <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> - <expand macro="dbKeyActionsBwa" /> - </data> - </outputs> - - <tests> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="input_type_selector" value="single"/> - <param name="fastq_input1" ftype="fasta" value="bwa-mem-fasta1.fa"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test1-fasta.bam" lines_diff="2" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="input_type_selector" value="paired"/> - <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> - <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="input_type_selector" value="paired"/> - <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/> - <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="input_type_selector" value="paired_bam"/> - <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> - <param name="input_type_selector" value="paired"/> - <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> - <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> - <param name="rg_selector" value="set"/> - <param name="ID" value="rg1"/> - <param name="PL" value="CAPILLARY"/> - <param name="analysis_type_selector" value="illumina"/> - <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2" /> - </test> - </tests> - <help> + <inputs> + <expand macro="reference_source_conditional"/> + <conditional name="input_type"> + <param name="input_type_selector" type="select" label="Select input type" + help="Select between fastq and bam datasets and between paired and single end data"> + <option value="paired">Paired fastq</option> + <option value="paired_collection">Paired fastq collection</option> + <option value="single">Single fastq</option> + <option value="paired_bam">Paired BAM</option> + <option value="single_bam">Single BAM</option> + </param> + <when value="paired"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" + label="Select first set of reads" help="Specify dataset with forward reads"/> + <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" + label="Select second set of reads" help="Specify dataset with reverse reads"/> + <conditional name="adv_pe_options"> + <expand macro="advanced_pe_options"/> + </conditional> + </when> + <when value="paired_collection"> + <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" + help="See help section for an explanation of dataset collections"/> + <conditional name="adv_pe_options"> + <expand macro="advanced_pe_options"/> + </conditional> + </when> + <when value="single"> + <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" + label="Select fastq dataset" help="Specify dataset with single reads"/> + <conditional name="adv_se_options"> + <expand macro="advanced_se_options"/> + </conditional> + </when> + <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options --> + <when value="paired_bam"> + <param name="bam_input" type="data" format="bam" label="Select BAM dataset" + help="Specify BAM dataset with paired reads"/> + <conditional name="adv_bam_pe_options"> + <expand macro="advanced_pe_options"/> + </conditional> + </when> + <when value="single_bam"> + <param name="bam_input" type="data" format="bam" label="Select BAM dataset" + help="Specify BAM dataset with single reads"/> + <conditional name="adv_bam_se_options"> + <expand macro="advanced_se_options"/> + </conditional> + </when> + </conditional> + <expand macro="read_group_conditional"/> + <conditional name="analysis_type"> + <param name="analysis_type_selector" type="select" label="Select analysis mode"> + <option value="illumina">1.Simple Illumina mode</option> + <option value="full">2.Full list of options</option> + </param> + <when value="illumina"> + <!-- do nothing --> + </when> + <when value="full"> + <param name="n" type="text" value="0.04" + label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths." + help="aln -n; default=0.04"/> + <param name="o" type="integer" value="1" label="maximum number or gap openings" + help="aln -o; default=1"/> + <param name="e" type="integer" value="-1" label="maximum number of gap extensions" + help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/> + <param name="i" type="integer" value="5" + label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/> + <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion" + help="aln -d; default=10"/> + <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/> + <param name="k" type="integer" value="2" label="maximum differences in the seed" + help="aln -k; default=2"/> + <param name="m" type="integer" value="2000000" label="maximum entries in the queue" + help="aln -m; default=2000000"/> + <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/> + <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/> + <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/> + <param name="R" type="integer" value="30" + label="stop searching when there are more than this value of equally best hits" + help="aln -R; default=30"/> + <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp" + help="aln -q; default=0"/> + <param name="B" type="integer" optional="True" label="length of barcode" + help="aln -B; optional parameter"/> + <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions" + help="aln -L; optional parameter"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> + <expand macro="dbKeyActionsBwa"/> + </data> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="input_type_selector" value="single"/> + <param name="fastq_input1" ftype="fasta" value="bwa-mem-fasta1.fa"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test1-fasta.bam" lines_diff="2"/> + </test> + <test> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="input_type_selector" value="paired"/> + <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> + <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/> + </test> + <test> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="input_type_selector" value="paired"/> + <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/> + <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/> + </test> + <test> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="input_type_selector" value="paired_bam"/> + <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2"/> + </test> + <test> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="input_type_selector" value="paired"/> + <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> + <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> + <param name="rg_selector" value="set"/> + <param name="ID" value="rg1"/> + <param name="PL" value="CAPILLARY"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2"/> + </test> + </tests> + <help><![CDATA[ **What is does** -BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use BWA-MEM algorithm distributed as a separate Galaxy tool. +BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the +human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use +the separate BWA-MEM Galaxy tool. This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool: - - **bwa aln** - actual mapper placing reads onto the reference sequence - - **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads - - **bam sampe** - post-processor for paired reads +- **bwa aln** - actual mapper placing reads onto the reference sequence +- **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for + single reads +- **bam sampe** - post-processor for paired reads -Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM (not SAM; in reality SAM produced by the bwa is converted to BAM on the fly by samtools view command) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). + +The Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM format, +which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). ----- **Indices: Selecting reference genomes for BWA** -Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options: +The Galaxy wrapper for BWA allows you to select between precomputed and user-defined indices for reference genomes +using the **Will you select a reference genome from your history or use a built-in index?** select box. + +This select box has two options: - 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. - 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa aln`. - + 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select + reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility + and are ready to be mapped against. + 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select + reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your + current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome + from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run + mapping with `bwa aln`. + + If your genome of interest is not listed here you have two choices: - 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added - 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option. - ------ - -**Galaxy-specific option** - -Galaxy allows three levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are: - - 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] - 2. *Full list of options*: Allows access to all options through Galaxy interface. - ------- - -**bwa-aln options** - -Each Galaxy parameter widget corresponds to command line flags listed below:: + 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index + needs to be added + 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history + and build index** option. - -n NUM max #diff (int) or missing prob under 0.02 err rate (float) [0.04] - -o INT maximum number or fraction of gap opens [1] - -e INT maximum number of gap extensions, -1 for disabling long gaps [-1] - -i INT do not put an indel within INT bp towards the ends [5] - -d INT maximum occurrences for extending a long deletion [10] - -l INT seed length [32] - -k INT maximum differences in the seed [2] - -m INT maximum entries in the queue [2000000] - -M INT mismatch penalty [3] - -O INT gap open penalty [11] - -E INT gap extension penalty [4] - -R INT stop searching when there are >INT equally best hits [30] - -q INT quality threshold for read trimming down to 35bp [0] - -B INT length of barcode - -L log-scaled gap penalty for long deletions - -N non-iterative mode: search for all n-difference hits (slooow) - -I the input is in the Illumina 1.3+ FASTQ-like format - -b the input read file is in the BAM format - -0 use single-end reads only (effective with -b) - -1 use the 1st read in a pair (effective with -b) - -2 use the 2nd read in a pair (effective with -b) - -**bwa-samse options**:: - - -a INT maximum insert size [500] - -o INT maximum occurrences for one end [100000] - -n INT maximum hits to output for paired reads [3] - -N INT maximum hits to output for discordant pairs [10] - -c FLOAT prior of chimeric rate (lower bound) [1.0e-05] - -r STR read group header line [null] - -**bwa-sampe options**:: - - -n INT maximum hits to output for paired reads [3] - -r STR read group header line [null] - -@dataset_collections@ @RG@ @info@ - </help> - <citations> - <citation type="doi">10.1093/bioinformatics/btp324</citation> - <citation type="doi">10.1093/bioinformatics/btp698</citation> - </citations> + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btp324</citation> + <citation type="doi">10.1093/bioinformatics/btp698</citation> + </citations> </tool>
--- a/bwa_macros.xml Tue Nov 21 11:23:45 2017 -0500 +++ b/bwa_macros.xml Fri Nov 24 09:55:28 2017 -0500 @@ -1,115 +1,115 @@ <macros> - <import>read_group_macros.xml</import> + <import>read_group_macros.xml</import> - <token name="@VERSION@">0.7.17</token> + <token name="@VERSION@">0.7.17</token> - <token name="@set_rg_string@"> - #set $rg_string = "@RG\\tID:" + str($rg_id) - #set $rg_string += $format_read_group("\\tSM:", $rg_sm) - #set $rg_string += $format_read_group("\\tPL:", $rg_pl) - #set $rg_string += $format_read_group("\\tLB:", $rg_lb) - #set $rg_string += $format_read_group("\\tCN:", $rg_cn) - #set $rg_string += $format_read_group("\\tDS:", $rg_ds) - #set $rg_string += $format_read_group("\\tDT:", $rg_dt) - #set $rg_string += $format_read_group("\\tFO:", $rg_fo) - #set $rg_string += $format_read_group("\\tKS:", $rg_ks) - #set $rg_string += $format_read_group("\\tPG:", $rg_pg) - #set $rg_string += $format_read_group("\\tPI:", $rg_pi) - #set $rg_string += $format_read_group("\\tPU:", $rg_pu) - </token> + <token name="@set_rg_string@"> + #set $rg_string = "@RG\\tID:" + str($rg_id) + #set $rg_string += $format_read_group("\\tSM:", $rg_sm) + #set $rg_string += $format_read_group("\\tPL:", $rg_pl) + #set $rg_string += $format_read_group("\\tLB:", $rg_lb) + #set $rg_string += $format_read_group("\\tCN:", $rg_cn) + #set $rg_string += $format_read_group("\\tDS:", $rg_ds) + #set $rg_string += $format_read_group("\\tDT:", $rg_dt) + #set $rg_string += $format_read_group("\\tFO:", $rg_fo) + #set $rg_string += $format_read_group("\\tKS:", $rg_ks) + #set $rg_string += $format_read_group("\\tPG:", $rg_pg) + #set $rg_string += $format_read_group("\\tPI:", $rg_pi) + #set $rg_string += $format_read_group("\\tPU:", $rg_pu) + </token> - <token name="@set_reference_fasta_filename@"><![CDATA[ + <token name="@set_reference_fasta_filename@"><![CDATA[ #if str( $reference_source.reference_source_selector ) == "history": #set $reference_fasta_filename = "localref.fa" - ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && + ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' && bwa index #if str($reference_source.index_a) != 'auto' -a ${reference_source.index_a} #end if - "${reference_fasta_filename}" && + '${reference_fasta_filename}' && #else: #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) #end if ]]></token> - <xml name="requirements"> - <requirements> - <requirement type="package" version="@VERSION@">bwa</requirement> - <requirement type="package" version="1.5">samtools</requirement> - </requirements> - </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">bwa</requirement> + <requirement type="package" version="1.6">samtools</requirement> + </requirements> + </xml> - <xml name="stdio"> - <stdio> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> - </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> - <macro name="reference_source_conditional"> - <conditional name="reference_source"> - <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> - <option value="cached">Use a built-in genome index</option> - <option value="history">Use a genome from history and build index</option> - </param> - <when value="cached"> - <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> - <options from_data_table="bwa_mem_indexes"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> - </param> - </when> - <when value="history"> - <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> - <param name="index_a" type="select" label="Algorithm for constructing the BWT index" help="(-a)"> - <option value="auto">Auto. Let BWA decide the best algorithm to use</option> - <option value="is">IS linear-time algorithm for constructing suffix array. It requires 5.37N memory where N is the size of the database. IS is moderately fast, but does not work with database larger than 2GB</option> - <option value="bwtsw">BWT-SW algorithm. This method works also with big genomes</option> - </param> - </when> - </conditional> - </macro> + <macro name="reference_source_conditional"> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> + <option value="cached">Use a built-in genome index</option> + <option value="history">Use a genome from history and build index</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="bwa_mem_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> + <param name="index_a" type="select" label="Algorithm for constructing the BWT index" help="(-a)"> + <option value="auto">Auto. Let BWA decide the best algorithm to use</option> + <option value="is">IS linear-time algorithm for constructing suffix array. It requires 5.37N memory where N is the size of the database. IS is moderately fast, but does not work with database larger than 2GB</option> + <option value="bwtsw">BWT-SW algorithm. This method works also with big genomes</option> + </param> + </when> + </conditional> + </macro> - <macro name="dbKeyActionsBwa"> - <expand macro="dbKeyActions"> - <option type="from_data_table" name="bwa_indexes" column="1" offset="0"> - <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> - <filter type="param_value" ref="reference_source.ref_file" column="0"/> - </option> - </expand> - </macro> + <macro name="dbKeyActionsBwa"> + <expand macro="dbKeyActions"> + <option type="from_data_table" name="bwa_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="reference_source.ref_file" column="0"/> + </option> + </expand> + </macro> - <macro name="dbKeyActionsBwaMem"> - <expand macro="dbKeyActions"> - <option type="from_data_table" name="bwa_mem_indexes" column="1" offset="0"> - <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> - <filter type="param_value" ref="reference_source.ref_file" column="0"/> - </option> - </expand> - </macro> + <macro name="dbKeyActionsBwaMem"> + <expand macro="dbKeyActions"> + <option type="from_data_table" name="bwa_mem_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="reference_source.ref_file" column="0"/> + </option> + </expand> + </macro> - <macro name="dbKeyActions"> - <actions> - <conditional name="reference_source.reference_source_selector"> - <when value="cached"> - <action type="metadata" name="dbkey"> - <yield /> - </action> - </when> - <when value="history"> - <action type="metadata" name="dbkey"> - <option type="from_param" name="reference_source.ref_file" param_attribute="dbkey" /> - </action> - </when> - </conditional> - </actions> - </macro> + <macro name="dbKeyActions"> + <actions> + <conditional name="reference_source.reference_source_selector"> + <when value="cached"> + <action type="metadata" name="dbkey"> + <yield /> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="reference_source.ref_file" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </macro> - <token name="@RG@"> + <token name="@RG@"> ----- .. class:: warningmark @@ -160,8 +160,8 @@ @RG ID:FLOWCELL2.LANE4 PL:illumina LB:LIB-KID-2 SM:KID PI:400 Note the hierarchical relationship between read groups (unique for each lane) to libraries (sequenced on two lanes) and samples (across four lanes, two lanes for each library). - </token> - <token name="@info@"> + </token> + <token name="@info@"> ----- .. class:: infomark @@ -175,16 +175,5 @@ 3. https://github.com/lh3/bwa 4. http://bio-bwa.sourceforge.net/ - </token> - - <token name="@dataset_collections@"> ------- - -**Dataset collections - processing large numbers of datasets at once** - -Dataset collections are in beta-testing. Extensive documentation will be added later this Spring. - - - </token> - + </token> </macros>
