comparison bwa.xml @ 0:c9c78719ef26 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bwa commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author devteam
date Mon, 09 Nov 2015 11:19:02 -0500
parents
children 8955a9521def
comparison
equal deleted inserted replaced
-1:000000000000 0:c9c78719ef26
1 <?xml version="1.0"?>
2 <tool id="bwa" name="Map with BWA" version="0.4.1">
3 <description>- map short reads (&lt; 100 bp) against reference genome</description>
4 <macros>
5 <import>read_group_macros.xml</import>
6 <import>bwa_macros.xml</import>
7 <token name="@command_options@">
8 #if str( $analysis_type.analysis_type_selector ) == "full":
9 -n ${analysis_type.n}
10 -o ${analysis_type.o}
11 -e ${analysis_type.e}
12 -i ${analysis_type.i}
13 -d ${analysis_type.d}
14 -l ${analysis_type.l}
15 -k ${analysis_type.k}
16 -m ${analysis_type.m}
17 -M ${analysis_type.M}
18 -O ${analysis_type.O}
19 -E ${analysis_type.E}
20 -R ${analysis_type.R}
21 -q ${analysis_type.q}
22
23 #if str( $analysis_type.B ):
24 -B ${analysis_type.B}
25 #end if
26
27 #if str( $analysis_type.L ):
28 -L ${analysis_type.L}
29 #end if
30 #end if
31 </token>
32 <token name="@read_group_options@">
33 #if $use_rg:
34 @set_rg_string@
35 -r '$rg_string'
36 #end if
37 </token>
38
39 <xml name="advanced_pe_options">
40 <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?" help="Provides additional controls">
41 <option value="set">Set</option>
42 <option value="do_not_set" selected="True">Do not set</option>
43 </param>
44 <when value="set">
45 <param name="a" type="integer" value="500" label="Maximum insert size for a read pair to be considered being mapped properly." help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/>
46 <param name="o" type="integer" value="100000" label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read." help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/>
47 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly." help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
48 <param name="N" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)." help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/>
49 <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)" help="sampe -c"/>
50 </when>
51 <when value="do_not_set">
52 <!-- do nothing -->
53 </when>
54 </xml>
55 <xml name="advanced_se_options">
56 <param name="adv_se_options_selector" type="select" label="Set advanced single end options?" help="Provides additional controls">
57 <option value="set">Set</option>
58 <option value="do_not_set" selected="True">Do not set</option>
59 </param>
60 <when value="set">
61 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag." help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
62 </when>
63 <when value="do_not_set">
64 <!-- do nothing -->
65 </when>
66 </xml>
67 </macros>
68 <expand macro="requirements" />
69 <expand macro="stdio" />
70 <command>
71 #set $reference_fasta_filename = "localref.fa"
72
73 #if str( $reference_source.reference_source_selector ) == "history":
74 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
75
76 ## The following shell commands decide with of the BWA indexing algorithms (IS or BWTSW) will be run
77 ## depending ob the size of the input FASTA dataset
78 (
79 size=`stat -c %s "${reference_fasta_filename}" 2&gt;/dev/null`; ## Linux
80 if [ $? -eq 0 ];
81 then
82 if [ "\$size" -lt 2000000000 ];
83 then
84 bwa index -a is "${reference_fasta_filename}";
85 else
86 bwa index -a bwtsw "${reference_fasta_filename}";
87 fi;
88 fi;
89
90 eval \$(stat -s "${reference_fasta_filename}" 2&gt;/dev/null); ## OSX
91 if [ -n "\$st_size" ];
92 then
93 if [ "\$st_size" -lt 2000000000 ];
94 then
95 bwa index -a is "${reference_fasta_filename}";
96 echo "Generating BWA index with is algorithm";
97 else
98 bwa index -a bwtsw "${reference_fasta_filename}";
99 echo "Generating BWA index with bwtsw algorithm";
100 fi;
101 fi;
102 ) &amp;&amp;
103 #else:
104 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
105 #end if
106
107 ## setup vars for rg handling...
108 @define_read_group_helpers@
109 #if str( $input_type.input_type_selector ) == "paired":
110 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2)
111 #elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]:
112 #set $rg_auto_name = $read_group_name_default($input_type.bam_input)
113 #else
114 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1)
115 #end if
116 @set_use_rg_var@
117 @set_read_group_vars@
118
119 ## Begin bwa command line
120
121 ####### Fastq paired
122
123 #if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection":
124 bwa aln
125 -t "\${GALAXY_SLOTS:-1}"
126
127 @command_options@
128
129 "${reference_fasta_filename}"
130
131 #if str( $input_type.input_type_selector ) == "paired_collection":
132 "${input_type.fastq_input1.forward}"
133 #else
134 "${input_type.fastq_input1}"
135 #end if
136
137 > first.sai &amp;&amp;
138
139 bwa aln
140 -t "\${GALAXY_SLOTS:-1}"
141
142 @command_options@
143
144 "${reference_fasta_filename}"
145
146 #if str( $input_type.input_type_selector ) == "paired_collection":
147 "${input_type.fastq_input1.reverse}"
148 #else
149 "${input_type.fastq_input2}"
150 #end if
151
152 > second.sai &amp;&amp;
153
154 bwa sampe
155
156 #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True":
157 -a ${$input_type.adv_pe_options.a}
158 -o ${$input_type.adv_pe_options.o}
159 -n ${$input_type.adv_pe_options.n}
160 -N ${$input_type.adv_pe_options.N}
161 #end if
162
163 @read_group_options@
164
165 #if str( $input_type.input_type_selector ) == "paired_collection":
166 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1.forward}" "${input_type.fastq_input1.reverse}"
167 #else:
168 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1}" "${input_type.fastq_input2}"
169 #end if
170
171 ####### Fastq single
172
173 #elif str( $input_type.input_type_selector ) == "single":
174 bwa aln
175 -t "\${GALAXY_SLOTS:-1}"
176
177 @command_options@
178
179 "${reference_fasta_filename}"
180 "${input_type.fastq_input1}"
181 > first.sai &amp;&amp;
182
183 bwa samse
184
185 #if str( $input_type.adv_se_options.adv_se_options_selector) == "True":
186 -n ${$input_type.adv_se_options.n}
187 #end if
188
189 @read_group_options@
190
191 "${reference_fasta_filename}" first.sai "${input_type.fastq_input1}"
192
193 ####### BAM paired
194
195 #elif str( $input_type.input_type_selector ) == "paired_bam":
196 bwa aln
197 -t "\${GALAXY_SLOTS:-1}"
198 -b
199 -1
200
201 @command_options@
202
203 "${reference_fasta_filename}"
204 "${input_type.bam_input}"
205 > first.sai &amp;&amp;
206
207 bwa aln
208 -t "\${GALAXY_SLOTS:-1}"
209 -b
210 -2
211 @command_options@
212 "${reference_fasta_filename}"
213 "${input_type.bam_input}"
214 > second.sai &amp;&amp;
215
216 bwa sampe
217
218 #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True":
219 -a ${$input_type.adv_bam_pe_options.a}
220 -o ${$input_type.adv_bam_pe_options.o}
221 -n ${$input_type.adv_bam_pe_options.n}
222 -N ${$input_type.adv_bam_pe_options.N}
223 #end if
224
225 @read_group_options@
226
227 "${reference_fasta_filename}" first.sai second.sai "${input_type.bam_input}" "${input_type.bam_input}"
228
229 ####### Fastq single ------------ to do next
230
231 #elif str( $input_type.input_type_selector ) == "single_bam":
232 bwa aln
233 -t "\${GALAXY_SLOTS:-1}"
234 -b
235 -0
236
237 @command_options@
238
239 "${reference_fasta_filename}"
240 "${input_type.bam_input}"
241 > first.sai &amp;&amp;
242
243 bwa samse
244
245 #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True":
246 -n ${$input_type.adv_bam_se_options.n}
247 #end if
248
249 @read_group_options@
250
251 "${reference_fasta_filename}" first.sai "${input_type.bam_input}"
252 #end if
253
254 | samtools view -Sb - > temporary_bam_file.bam &amp;&amp;
255
256 samtools sort -f temporary_bam_file.bam ${bam_output}
257 </command>
258
259 <inputs>
260
261 <conditional name="reference_source">
262 <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
263 <option value="cached">Use a built-in genome index</option>
264 <option value="history">Use a genome from history and build index</option>
265 </param>
266 <when value="cached">
267 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
268 <options from_data_table="bwa_mem_indexes">
269 <filter type="sort_by" column="2" />
270 <validator type="no_options" message="No indexes are available" />
271 </options>
272 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
273 </param>
274 </when>
275 <when value="history">
276 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
277 </when>
278 </conditional>
279 <conditional name="input_type">
280 <param name="input_type_selector" type="select" label="Select input type" help="Select between fastq and bam datasets and between paired and single end data">
281 <option value="paired">Paired fastq</option>
282 <option value="paired_collection">Paired fastq collection</option>
283 <option value="single">Single fastq</option>
284 <option value="paired_bam">Paired BAM</option>
285 <option value="single_bam">Single BAM</option>
286 </param>
287 <when value="paired">
288 <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
289 <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
290 <conditional name="adv_pe_options">
291
292 <expand macro="advanced_pe_options" />
293
294 </conditional>
295 </when>
296
297 <when value="paired_collection">
298 <param name="fastq_input1" format="fastqsanger" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
299 <conditional name="adv_pe_options">
300
301 <expand macro="advanced_pe_options" />
302
303 </conditional>
304 </when>
305
306 <when value="single">
307 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/>
308 <conditional name="adv_se_options">
309
310 <expand macro="advanced_se_options" />
311
312 </conditional>
313 </when>
314
315 <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options -->
316
317 <when value="paired_bam">
318 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with paired reads"/>
319 <conditional name="adv_bam_pe_options">
320
321 <expand macro="advanced_pe_options" />
322
323 </conditional>
324 </when>
325
326 <when value="single_bam">
327 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with single reads"/>
328 <conditional name="adv_bam_se_options">
329
330 <expand macro="advanced_se_options" />
331
332 </conditional>
333 </when>
334
335 </conditional>
336
337 <expand macro="read_group_conditional" />
338
339 <conditional name="analysis_type">
340 <param name="analysis_type_selector" type="select" label="Select analysis mode">
341 <option value="illumina">1.Simple Illumina mode</option>
342 <option value="full">2.Full list of options</option>
343 </param>
344 <when value="illumina">
345 <!-- do nothing -->
346 </when>
347 <when value="full">
348 <param name="n" type="text" value="0.04" label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths." help="aln -n; default=0.04"/>
349 <param name="o" type="integer" value="1" label="maximum number or gap openings" help="aln -o; default=1"/>
350 <param name="e" type="integer" value="-1" label="maximum number of gap extensions" help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/>
351 <param name="i" type="integer" value="5" label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/>
352 <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion" help="aln -d; default=10"/>
353 <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/>
354 <param name="k" type="integer" value="2" label="maximum differences in the seed" help="aln -k; default=2"/>
355 <param name="m" type="integer" value="2000000" label="maximum entries in the queue" help="aln -m; default=2000000"/>
356 <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/>
357 <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/>
358 <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/>
359 <param name="R" type="integer" value="30" label="stop searching when there are more than this value of equally best hits" help="aln -R; default=30"/>
360 <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp" help="aln -q; default=0"/>
361 <param name="B" type="integer" optional="True" label="length of barcode" help="aln -B; optional parameter"/>
362 <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions" help="aln -L; optional parameter"/>
363 </when>
364 </conditional>
365 </inputs>
366
367 <outputs>
368 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)">
369 <expand macro="dbKeyActionsBwa" />
370 </data>
371 </outputs>
372
373 <tests>
374 <test>
375 <param name="reference_source_selector" value="history" />
376 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
377 <param name="input_type_selector" value="paired"/>
378 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
379 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
380 <param name="analysis_type_selector" value="illumina"/>
381 <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2" />
382 </test>
383 <test>
384 <param name="reference_source_selector" value="history" />
385 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
386 <param name="input_type_selector" value="paired_bam"/>
387 <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/>
388 <param name="analysis_type_selector" value="illumina"/>
389 <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2" />
390 </test>
391 <test>
392 <param name="reference_source_selector" value="history" />
393 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
394 <param name="input_type_selector" value="paired"/>
395 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
396 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
397 <param name="rg_selector" value="set"/>
398 <param name="ID" value="rg1"/>
399 <param name="PL" value="CAPILLARY"/>
400 <param name="analysis_type_selector" value="illumina"/>
401 <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2" />
402 </test>
403 </tests>
404 <help>
405 **What is does**
406
407 BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use BWA-MEM algorithm distributed as a separate Galaxy tool.
408
409 This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool:
410
411 - **bwa aln** - actual mapper placing reads onto the reference sequence
412 - **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads
413 - **bam sampe** - post-processor for paired reads
414
415 Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM (not SAM; in reality SAM produced by the bwa is converted to BAM on the fly by samtools view command) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
416
417 -----
418
419 **Indices: Selecting reference genomes for BWA**
420
421 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:
422
423 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against.
424 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa aln`.
425
426 If your genome of interest is not listed here you have two choices:
427
428 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
429 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.
430
431 -----
432
433 **Galaxy-specific option**
434
435 Galaxy allows three levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
436
437 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem &lt;reference index&gt; &lt;fastq dataset1&gt; [fastq dataset2]
438 2. *Full list of options*: Allows access to all options through Galaxy interface.
439
440 ------
441
442 **bwa-aln options**
443
444 Each Galaxy parameter widget corresponds to command line flags listed below::
445
446 -n NUM max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
447 -o INT maximum number or fraction of gap opens [1]
448 -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]
449 -i INT do not put an indel within INT bp towards the ends [5]
450 -d INT maximum occurrences for extending a long deletion [10]
451 -l INT seed length [32]
452 -k INT maximum differences in the seed [2]
453 -m INT maximum entries in the queue [2000000]
454 -M INT mismatch penalty [3]
455 -O INT gap open penalty [11]
456 -E INT gap extension penalty [4]
457 -R INT stop searching when there are >INT equally best hits [30]
458 -q INT quality threshold for read trimming down to 35bp [0]
459 -B INT length of barcode
460 -L log-scaled gap penalty for long deletions
461 -N non-iterative mode: search for all n-difference hits (slooow)
462 -I the input is in the Illumina 1.3+ FASTQ-like format
463 -b the input read file is in the BAM format
464 -0 use single-end reads only (effective with -b)
465 -1 use the 1st read in a pair (effective with -b)
466 -2 use the 2nd read in a pair (effective with -b)
467
468 **bwa-samse options**::
469
470 -a INT maximum insert size [500]
471 -o INT maximum occurrences for one end [100000]
472 -n INT maximum hits to output for paired reads [3]
473 -N INT maximum hits to output for discordant pairs [10]
474 -c FLOAT prior of chimeric rate (lower bound) [1.0e-05]
475 -r STR read group header line [null]
476
477 **bwa-sampe options**::
478
479 -n INT maximum hits to output for paired reads [3]
480 -r STR read group header line [null]
481
482 @dataset_collections@
483
484 @RG@
485
486 @info@
487 </help>
488 <citations>
489 <citation type="doi">10.1093/bioinformatics/btp324</citation>
490 <citation type="doi">10.1093/bioinformatics/btp698</citation>
491 </citations>
492 </tool>