comparison test4.xml @ 6:ad564f5bb873 draft

Uploaded
author david-hoover
date Thu, 19 Jun 2014 15:27:00 -0400
parents
children cd6e7d53ec4a
comparison
equal deleted inserted replaced
5:6f35c495deb9 6:ad564f5bb873
1 <tool id="test2" name="Stupid Test 2" version="1.2.3">
2 <requirements>
3 <requirement type="set_environment" name="for_garbage">PATH</requirement>
4 <requirement type="set_environment" name="for_garbage">LD_LIBRARY_PATH</requirement>
5 </requirements>
6 <description></description>
7 <parallelism method="basic"></parallelism>
8 <command interpreter="sh">
9 bwa_wrapper.sh
10 --threads="4"
11
12 #if $input1.ext == "fastqillumina":
13 --illumina1.3
14 #end if
15
16 ## reference source
17 --fileSource=$genomeSource.refGenomeSource
18 #if $genomeSource.refGenomeSource == "history":
19 ##build index on the fly
20 --ref="${genomeSource.ownFile}"
21 --dbkey=$dbkey
22 #else:
23 ##use precomputed indexes
24 --ref="${genomeSource.indices.fields.path}"
25 --do_not_build_index
26 #end if
27
28 ## input file(s)
29 --input1=$paired.input1
30 #if $paired.sPaired == "paired":
31 --input2=$paired.input2
32 #end if
33
34 ## output file
35 --output=$output
36
37 ## run parameters
38 --genAlignType=$paired.sPaired
39 --params=$params.source_select
40 #if $params.source_select != "pre_set":
41 --maxEditDist=$params.maxEditDist
42 --fracMissingAligns=$params.fracMissingAligns
43 --maxGapOpens=$params.maxGapOpens
44 --maxGapExtens=$params.maxGapExtens
45 --disallowLongDel=$params.disallowLongDel
46 --disallowIndel=$params.disallowIndel
47 --seed=$params.seed
48 --maxEditDistSeed=$params.maxEditDistSeed
49 --mismatchPenalty=$params.mismatchPenalty
50 --gapOpenPenalty=$params.gapOpenPenalty
51 --gapExtensPenalty=$params.gapExtensPenalty
52 --suboptAlign="${params.suboptAlign}"
53 --noIterSearch=$params.noIterSearch
54 --outputTopN=$params.outputTopN
55 --outputTopNDisc=$params.outputTopNDisc
56 --maxInsertSize=$params.maxInsertSize
57 --maxOccurPairing=$params.maxOccurPairing
58 #if $params.readGroup.specReadGroup == "yes"
59 --rgid="$params.readGroup.rgid"
60 --rgcn="$params.readGroup.rgcn"
61 --rgds="$params.readGroup.rgds"
62 --rgdt="$params.readGroup.rgdt"
63 --rgfo="$params.readGroup.rgfo"
64 --rgks="$params.readGroup.rgks"
65 --rglb="$params.readGroup.rglb"
66 --rgpg="$params.readGroup.rgpg"
67 --rgpi="$params.readGroup.rgpi"
68 --rgpl="$params.readGroup.rgpl"
69 --rgpu="$params.readGroup.rgpu"
70 --rgsm="$params.readGroup.rgsm"
71 #end if
72 #end if
73
74 ## suppress output SAM header
75 --suppressHeader=$suppressHeader
76 </command>
77 <inputs>
78 <conditional name="genomeSource">
79 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
80 <option value="indexed">Use a built-in index</option>
81 <option value="history">Use one from the history</option>
82 </param>
83 <when value="indexed">
84 <param name="indices" type="select" label="Select a reference genome">
85 <options from_data_table="bwa_indexes">
86 <filter type="sort_by" column="2" />
87 <validator type="no_options" message="No indexes are available" />
88 </options>
89 </param>
90 </when>
91 <when value="history">
92 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
93 </when>
94 </conditional>
95 <conditional name="paired">
96 <param name="sPaired" type="select" label="Is this library mate-paired?">
97 <option value="single">Single-end</option>
98 <option value="paired">Paired-end</option>
99 </param>
100 <when value="single">
101 <param name="input1" type="data" format="fastqsanger,fastqillumina" label="FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
102 </when>
103 <when value="paired">
104 <param name="input1" type="data" format="fastqsanger,fastqillumina" label="Forward FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
105 <param name="input2" type="data" format="fastqsanger,fastqillumina" label="Reverse FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
106 </when>
107 </conditional>
108 <conditional name="params">
109 <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
110 <option value="pre_set">Commonly Used</option>
111 <option value="full">Full Parameter List</option>
112 </param>
113 <when value="pre_set" />
114 <when value="full">
115 <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
116 <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
117 <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
118 <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
119 <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
120 <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
121 <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
122 <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
123 <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
124 <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
125 <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
126 <param name="suboptAlign" type="integer" optional="True" label="Proceed with suboptimal alignments if there are no more than INT equally best hits. (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
127 <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
128 <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
129 <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
130 <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
131 <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
132 <conditional name="readGroup">
133 <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
134 <option value="yes">Yes</option>
135 <option value="no" selected="True">No</option>
136 </param>
137 <when value="yes">
138 <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG
139 tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group
140 IDs may be modified when merging SAM files in order to handle collisions." />
141 <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
142 <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
143 <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
144 <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each
145 flow of each read." help="Optional. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by
146 various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
147 <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
148 <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
149 <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
150 <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
151 <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA,
152 SOLID, HELICOS, IONTORRENT and PACBIO" />
153 <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" />
154 <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
155 </when>
156 <when value="no" />
157 </conditional>
158 </when>
159 </conditional>
160 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
161 </inputs>
162 <outputs>
163 <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
164 <actions>
165 <conditional name="genomeSource.refGenomeSource">
166 <when value="indexed">
167 <action type="metadata" name="dbkey">
168 <option type="from_data_table" name="bwa_indexes" column="1">
169 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
170 <filter type="param_value" ref="genomeSource.indices" column="0"/>
171 </option>
172 </action>
173 </when>
174 <when value="history">
175 <action type="metadata" name="dbkey">
176 <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
177 </action>
178 </when>
179 </conditional>
180 </actions>
181 </data>
182 </outputs>
183 <tests>
184 <test>
185 <!--
186 BWA commands:
187 bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sai
188 bwa samse phiX.fasta bwa_wrapper_out1.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sam
189 phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
190 remove the comment lines (beginning with '@') from the resulting sam file
191 plain old sort doesn't handle underscores like python:
192 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out1.u.sam bwa_wrapper_out1.sam
193 -->
194 <param name="refGenomeSource" value="indexed" />
195 <param name="indices" value="phiX" />
196 <param name="sPaired" value="single" />
197 <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
198 <param name="source_select" value="pre_set" />
199 <param name="suppressHeader" value="true" />
200 <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" sort="True" />
201 </test>
202 <test>
203 <!--
204 BWA commands:
205 cp test-data/phiX.fasta phiX.fasta
206 bwa index -a is phiX.fasta
207 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.sai
208 bwa samse -n 3 phiX.fasta bwa_wrapper_out2.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.u.sam
209 phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
210 remove the comment lines (beginning with '@') from the resulting sam file
211 plain old sort doesn't handle underscores like python:
212 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out2.u.sam bwa_wrapper_out2.sam
213 -->
214 <param name="refGenomeSource" value="history" />
215 <param name="ownFile" value="phiX.fasta" />
216 <param name="sPaired" value="single" />
217 <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
218 <param name="source_select" value="full" />
219 <param name="maxEditDist" value="0" />
220 <param name="fracMissingAligns" value="0.04" />
221 <param name="maxGapOpens" value="1" />
222 <param name="maxGapExtens" value="-1" />
223 <param name="disallowLongDel" value="16" />
224 <param name="disallowIndel" value="5" />
225 <param name="seed" value="-1" />
226 <param name="maxEditDistSeed" value="2" />
227 <param name="mismatchPenalty" value="3" />
228 <param name="gapOpenPenalty" value="11" />
229 <param name="gapExtensPenalty" value="4" />
230 <param name="suboptAlign" value="" />
231 <param name="noIterSearch" value="true" />
232 <param name="outputTopN" value="3" />
233 <param name="outputTopNDisc" value="10" />
234 <param name="maxInsertSize" value="500" />
235 <param name="maxOccurPairing" value="100000" />
236 <param name="specReadGroup" value="no" />
237 <param name="suppressHeader" value="true" />
238 <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" sort="True" />
239 </test>
240 <test>
241 <!--
242 BWA commands:
243 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out3a.sai
244 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3b.sai
245 bwa sampe -a 500 -o 100000 -n 3 -N 10 -r "@RG\tID:abcdefg\tDS:descrip\tDT:2010-11-01\tLB:lib-mom-A\tPI:400\tPL:ILLUMINA\tSM:mom" phiX.fasta bwa_wrapper_out3a.sai bwa_wrapper_out3b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3.u.sam
246 phiX.fasta is the prefix for the reference
247 plain old sort doesn't handle underscores like python:
248 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out3.u.sam bwa_wrapper_out3.sam
249 -->
250 <param name="refGenomeSource" value="indexed" />
251 <param name="indices" value="phiX" />
252 <param name="sPaired" value="paired" />
253 <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
254 <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
255 <param name="source_select" value="full" />
256 <param name="maxEditDist" value="0" />
257 <param name="fracMissingAligns" value="0.04" />
258 <param name="maxGapOpens" value="1" />
259 <param name="maxGapExtens" value="-1" />
260 <param name="disallowLongDel" value="16" />
261 <param name="disallowIndel" value="5" />
262 <param name="seed" value="-1" />
263 <param name="maxEditDistSeed" value="2" />
264 <param name="mismatchPenalty" value="3" />
265 <param name="gapOpenPenalty" value="11" />
266 <param name="gapExtensPenalty" value="4" />
267 <param name="suboptAlign" value="" />
268 <param name="noIterSearch" value="true" />
269 <param name="outputTopN" value="3" />
270 <param name="outputTopNDisc" value="10" />
271 <param name="maxInsertSize" value="500" />
272 <param name="maxOccurPairing" value="100000" />
273 <param name="specReadGroup" value="yes" />
274 <param name="rgid" value="abcdefg" />
275 <param name="rgcn" value="" />
276 <param name="rgds" value="descrip" />
277 <param name="rgdt" value="2010-11-01" />
278 <param name="rgfo" value="" />
279 <param name="rgks" value="" />
280 <param name="rglb" value="lib-mom-A" />
281 <param name="rgpg" value="" />
282 <param name="rgpi" value="400" />
283 <param name="rgpl" value="ILLUMINA" />
284 <param name="rgpu" value="" />
285 <param name="rgsm" value="mom" />
286 <param name="suppressHeader" value="false" />
287 <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" sort="True" lines_diff="2" />
288 </test>
289 <test>
290 <!--
291 BWA commands:
292 cp test-data/phiX.fasta phiX.fasta
293 bwa index -a is phiX.fasta
294 bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out8a.sai
295 bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8b.sai
296 bwa sampe -a 500 -o 100000 phiX.fasta bwa_wrapper_out8a.sai bwa_wrapper_out8b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8.u.sam
297 phiX.fa is the prefix for the reference
298 remove the comment lines (beginning with '@') from the resulting sam file
299 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out8.u.sam bwa_wrapper_out8.sam
300 -->
301 <param name="refGenomeSource" value="history" />
302 <!-- this is the backwards-compatible "unique value" for this index, not an actual path -->
303 <param name="ownFile" value="phiX.fasta" />
304 <param name="sPaired" value="paired" />
305 <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
306 <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
307 <param name="source_select" value="preSet" />
308 <param name="suppressHeader" value="true" />
309 <output name="output" file="bwa_wrapper_out8.sam" ftype="sam" sort="True" />
310 </test>
311 </tests>
312 <help>
313
314 **What it does**
315
316 BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
317
318 ------
319
320 **Know what you are doing**
321
322 .. class:: warningmark
323
324 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
325
326 .. __: http://bio-bwa.sourceforge.net/
327
328 ------
329
330 **Input formats**
331
332 BWA accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*) or Illumina FASTQ format (galaxy type *fastqillumina*). Use the FASTQ Groomer to prepare your files.
333
334 ------
335
336 **A Note on Built-in Reference Genomes**
337
338 The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
339
340 ------
341
342 **Outputs**
343
344 The output is in SAM format, and has the following columns::
345
346 Column Description
347 -------- --------------------------------------------------------
348 1 QNAME Query (pair) NAME
349 2 FLAG bitwise FLAG
350 3 RNAME Reference sequence NAME
351 4 POS 1-based leftmost POSition/coordinate of clipped sequence
352 5 MAPQ MAPping Quality (Phred-scaled)
353 6 CIGAR extended CIGAR string
354 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
355 8 MPOS 1-based Mate POSition
356 9 ISIZE Inferred insert SIZE
357 10 SEQ query SEQuence on the same strand as the reference
358 11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
359 12 OPT variable OPTional fields in the format TAG:VTYPE:VALU
360
361 The flags are as follows::
362
363 Flag Description
364 ------ -------------------------------------
365 0x0001 the read is paired in sequencing
366 0x0002 the read is mapped in a proper pair
367 0x0004 the query sequence itself is unmapped
368 0x0008 the mate is unmapped
369 0x0010 strand of the query (1 for reverse)
370 0x0020 strand of the mate
371 0x0040 the read is the first read in a pair
372 0x0080 the read is the second read in a pair
373 0x0100 the alignment is not primary
374
375 It looks like this (scroll sideways to see the entire example)::
376
377 QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT
378 HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
379 HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
380
381 -------
382
383 **BWA settings**
384
385 All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
386
387 ------
388
389 **BWA parameter list**
390
391 This is an exhaustive list of BWA options:
392
393 For **aln**::
394
395 -n NUM Maximum edit distance if the value is INT, or the fraction of missing
396 alignments given 2% uniform base error rate if FLOAT. In the latter
397 case, the maximum edit distance is automatically chosen for different
398 read lengths. [0.04]
399 -o INT Maximum number of gap opens [1]
400 -e INT Maximum number of gap extensions, -1 for k-difference mode
401 (disallowing long gaps) [-1]
402 -d INT Disallow a long deletion within INT bp towards the 3'-end [16]
403 -i INT Disallow an indel within INT bp towards the ends [5]
404 -l INT Take the first INT subsequence as seed. If INT is larger than the
405 query sequence, seeding will be disabled. For long reads, this option
406 is typically ranged from 25 to 35 for '-k 2'. [inf]
407 -k INT Maximum edit distance in the seed [2]
408 -t INT Number of threads (multi-threading mode) [1]
409 -M INT Mismatch penalty. BWA will not search for suboptimal hits with a score
410 lower than (bestScore-misMsc). [3]
411 -O INT Gap open penalty [11]
412 -E INT Gap extension penalty [4]
413 -c Reverse query but not complement it, which is required for alignment
414 in the color space.
415 -R Proceed with suboptimal alignments even if the top hit is a repeat. By
416 default, BWA only searches for suboptimal alignments if the top hit is
417 unique. Using this option has no effect on accuracy for single-end
418 reads. It is mainly designed for improving the alignment accuracy of
419 paired-end reads. However, the pairing procedure will be slowed down,
420 especially for very short reads (~32bp).
421 -N Disable iterative search. All hits with no more than maxDiff
422 differences will be found. This mode is much slower than the default.
423
424 For **samse**::
425
426 -n INT Maximum number of alignments to output in the XA tag for reads paired
427 properly. If a read has more than INT hits, the XA tag will not be
428 written. [3]
429 -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
430
431 For **sampe**::
432
433 -a INT Maximum insert size for a read pair to be considered as being mapped
434 properly. Since version 0.4.5, this option is only used when there
435 are not enough good alignment to infer the distribution of insert
436 sizes. [500]
437 -n INT Maximum number of alignments to output in the XA tag for reads paired
438 properly. If a read has more than INT hits, the XA tag will not be
439 written. [3]
440 -N INT Maximum number of alignments to output in the XA tag for disconcordant
441 read pairs (excluding singletons). If a read has more than INT hits,
442 the XA tag will not be written. [10]
443 -o INT Maximum occurrences of a read for pairing. A read with more
444 occurrences will be treated as a single-end read. Reducing this
445 parameter helps faster pairing. [100000]
446 -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
447
448 For specifying the read group in **samse** or **sampe**, use the following::
449
450 @RG Read group. Unordered multiple @RG lines are allowed.
451 ID Read group identifier. Each @RG line must have a unique ID. The value of
452 ID is used in the RG tags of alignment records. Must be unique among all
453 read groups in header section. Read group IDs may be modified when
454 merging SAM files in order to handle collisions.
455 CN Name of sequencing center producing the read.
456 DS Description.
457 DT Date the run was produced (ISO8601 date or date/time).
458 FO Flow order. The array of nucleotide bases that correspond to the
459 nucleotides used for each flow of each read. Multi-base flows are encoded
460 in IUPAC format, and non-nucleotide flows by various other characters.
461 Format : /\*|[ACMGRSVTWYHKDBN]+/
462 KS The array of nucleotide bases that correspond to the key sequence of each read.
463 LB Library.
464 PG Programs used for processing the read group.
465 PI Predicted median insert size.
466 PL Platform/technology used to produce the reads. Valid values : CAPILLARY,
467 LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO.
468 PU Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for
469 SOLiD). Unique identifier.
470 SM Sample. Use pool name where a pool is being sequenced.
471
472 </help>
473 </tool>
474
475