comparison lastz_wrapper.xml @ 1:3c13c9c09ad9 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:23:37 -0400
parents
children
comparison
equal deleted inserted replaced
0:0bd6b2cb3b2a 1:3c13c9c09ad9
1 <tool id="lastz_wrapper_2" name="Lastz" version="1.2.2">
2 <requirements>
3 <requirement type="package" version="1.02.00">lastz</requirement>
4 </requirements>
5 <description> map short reads against reference sequence</description>
6 <command interpreter="python">lastz_wrapper.py
7 #if $seq_name.how_to_name=="yes":
8 --ref_name="${seq_name.ref_name}"
9 #end if
10 --ref_source="${source.ref_source}"
11 --source_select="${params.source_select}"
12 --out_format="${out_format}"
13 --input2="${input2}"
14 #if $source.ref_source=="history":
15 --input1="${source.input1}"
16 --ref_sequences="${input1.metadata.sequences}"
17 #else:
18 --input1="${source.input1_2bit.fields.path}"
19 --ref_sequences="None"
20 #end if
21 #if $params.source_select=="pre_set":
22 --pre_set_options="${params.pre_set_options}"
23 #else:
24 --strand="${params.strand}"
25 --seed="${params.seed}"
26 --gfextend="${params.gfextend}"
27 --chain="${params.chain}"
28 --transition="${params.transition}"
29 --O="${params.O}"
30 --E="${params.E}"
31 --X="${params.X}"
32 --Y="${params.Y}"
33 --K="${params.K}"
34 --L="${params.L}"
35 --entropy="${params.entropy}"
36 #end if
37 --identity_min="${min_ident}"
38 --identity_max="${max_ident}"
39 --coverage="${min_cvrg}"
40 --output="${output1}"
41 --unmask="${unmask}"
42 --lastzSeqsFileDir="${GALAXY_DATA_INDEX_DIR}"
43 </command>
44 <inputs>
45 <param name="input2" format="fasta" type="data" label="Align sequencing reads in" />
46 <conditional name="source">
47 <param name="ref_source" type="select" label="Against reference sequences that are">
48 <option value="cached">locally cached</option>
49 <option value="history">in your history</option>
50 </param>
51 <when value="cached">
52 <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
53 <options from_data_table="lastz_seqs" />
54 </param>
55 </when>
56 <when value="history">
57 <param name="input1" type="data" format="fasta" label="Select a reference dataset" />
58 </when>
59 </conditional>
60 <param name="out_format" type="select" label="Output format">
61 <option value="sam">SAM</option>
62 <option value="diffs">Polymorphisms</option>
63 <option value="tabular">Tabular</option>
64 </param>
65 <conditional name="params">
66 <param name="source_select" type="select" label="Lastz settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
67 <option value="pre_set">Commonly used</option>
68 <option value="full">Full Parameter List</option>
69 </param>
70 <when value="pre_set">
71 <param name="pre_set_options" type="select" label="Select mapping mode">
72 <option value="yasra98">Roche-454 98% identity</option>
73 <option value="yasra95">Roche-454 95% identity</option>
74 <option value="yasra90">Roche-454 90% identity</option>
75 <option value="yasra85">Roche-454 85% identity</option>
76 <option value="yasra75">Roche-454 75% identity</option>
77 <option value="yasra95short">Illumina 95% identity</option>
78 <option value="yasra85short">Illumina 85% identity</option>
79 </param>
80 </when>
81 <when value="full">
82 <param name="strand" type="select" label="Which strand to search?">
83 <option value="both">Both</option>
84 <option value="plus">Search forward strand only (the one in the reference)</option>
85 <option value="minus">Search the reverse complement strand only (opposite of the reference)</option>
86 </param>
87 <param name="seed" type="select" label="Select seeding settings" help="allows you set word size and number of mismatches">
88 <option value="12of19">Seed hits require a 19 bp word with matches in 12 specific positions</option>
89 <option value="14of22">Seed hits require a 22 bp word with matches in 14 specific positions</option>
90 </param>
91 <param name="transition" type="select" label="Select transition settings" help="affects the number of allowed transition substitutions">
92 <option value="transition">Allow one transition in each seed hit</option>
93 <option value="transition=2">Allow two transitions in a seed hit </option>
94 <option value="notransition">Don't allow any transitions in seed hits</option>
95 </param>
96 <param name="gfextend" type="select" label="Perform gap-free extension of seed hits to HSPs (high scoring segment pairs)?">
97 <option value="nogfextend">No</option>
98 <option value="gfextend">Yes</option>
99 </param>
100 <param name="chain" type="select" label="Perform chaining of HSPs?">
101 <option value="nochain">No</option>
102 <option value="chain">Yes</option>
103 </param>
104 <param name="O" type="integer" value="400" label="Gap opening penalty"/>
105 <param name="E" type="integer" value="30" label="Gap extension penalty"/>
106 <param name="X" type="integer" value="910" label="X-drop threshold"/>
107 <param name="Y" type="integer" value="9370" label="Y-drop threshold"/>
108 <param name="K" type="integer" value="3000" label="Set the threshold for HSPs (ungapped extensions scoring lower are discarded)"/>
109 <param name="L" type="integer" value="3000" label="Set the threshold for gapped alignments (gapped extensions scoring lower are discarded)"/>
110 <param name="entropy" type="select" label="Involve entropy when filtering HSPs?">
111 <option value="noentropy">No</option>
112 <option value="entropy">Yes</option>
113 </param>
114 </when>
115 </conditional>
116 <conditional name="seq_name">
117 <param name="how_to_name" type="select" label="Do you want to modify the reference name?">
118 <option value="no">No</option>
119 <option value="yes">Yes</option>
120 </param>
121 <when value="yes">
122 <param name="ref_name" type="text" value="Type sequence name here" label="Enter name for the Reference sequence"/>
123 </when>
124 <when value="no" />
125 </conditional>
126 <param name="min_ident" type="integer" value="0" label="Do not report matches below this identity (%)"/>
127 <param name="max_ident" type="integer" value="100" label="Do not report matches above this identity (%)"/>
128 <param name="min_cvrg" type="integer" value="0" label="Do not report matches that cover less than this percentage of each read"/>
129 <param name="unmask" type="select" label="Convert lowercase bases to uppercase">
130 <option value="yes">Yes</option>
131 <option value="no">No</option>
132 </param>
133 </inputs>
134 <outputs>
135 <data format="tabular" name="output1" label="${tool.name} on ${on_string}: mapped reads">
136 <change_format>
137 <when input="out_format" value="sam" format="sam" />
138 </change_format>
139 </data>
140 </outputs>
141 <tests>
142 <test>
143 <!--
144 Lastz command:
145 lastz phiX.2bit/phiX174[nickname=Ref] test-data/b1.fasta +nogfextend +nochain +gapped +strand=both +seed=12of19 +transition O=400 E=30 X=910 Y=9370 K=3000 L=3000 +noentropy +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=sam- > lastz_wrapper_out2.sam
146 You need to point to phiX.2bit somewhere on your system. b1.fasta is located in galaxy's test-data. You will have to replace all the pluses before the
147 commands with 2 dashes, as double-dash can't appear in an XML comment.
148 -->
149 <param name="input2" value="b1.fasta" ftype="fasta" />
150 <param name="ref_source" value="cached" />
151 <param name="input1_2bit" value="phiX174" />
152 <param name="out_format" value="sam" />
153 <param name="source_select" value="full" />
154 <param name="strand" value="both" />
155 <param name="seed" value="12of19" />
156 <param name="transition" value="transition" />
157 <param name="gfextend" value="nogfextend" />
158 <param name="chain" value="nochain" />
159 <param name="O" value="400" />
160 <param name="E" value="30" />
161 <param name="X" value="910" />
162 <param name="Y" value="9370" />
163 <param name="K" value="3000" />
164 <param name="L" value="3000" />
165 <param name="entropy" value="noentropy" />
166 <!--
167 how_to_name is not the default. It is changed to modify
168 input1_2bit by adding the ref_name as a nickname
169 -->
170 <param name="how_to_name" value="yes" />
171 <param name="ref_name" value="Ref" />
172 <param name="min_ident" value="0" />
173 <param name="max_ident" value="100" />
174 <param name="min_cvrg" value="0" />
175 <param name="unmask" value="yes" />
176 <output name="output1" file="lastz_wrapper_out2.sam" />
177 </test>
178 <test>
179 <!--
180 Lastz command:
181 lastz test-data/phiX.fasta test-data/b1.fasta[fullnames] +yasra95short +ambiguousn +nolaj +identity=0..100 +coverage=0 +format=diffs > lastz_wrapper_out3.tabular
182 phiX.fasta and b1.fasta are located in galaxy's test-data. You will have to replace all the pluses before the commands with 2 dashes,
183 as double-dash can't appear in an XML comment.
184 -->
185 <param name="input2" value="b1.fasta" ftype="fasta" />
186 <param name="ref_source" value="history" />
187 <param name="input1" value="phiX.fasta" ftype="fasta" />
188 <param name="out_format" value="diffs" />
189 <param name="source_select" value="pre_set" />
190 <param name="pre_set_options" value="yasra95short" />
191 <param name="how_to_name" value="no" />
192 <param name="min_ident" value="0" />
193 <param name="max_ident" value="100" />
194 <param name="min_cvrg" value="0" />
195 <param name="unmask" value="yes" />
196 <output name="output1" file="lastz_wrapper_out3.tabular" />
197 </test>
198 <test>
199 <!--
200 Lastz command: first you will need to split the file phiX_split.fasta into two files,
201 phiX1.fasta and phiX2.fasta, each with 1 sequence (phiX1 and phiX2, respectively). Then:
202 lastz phiX1.fasta test-data/b1.fasta *yasra95short *ambiguousn *nolaj *identity=0..100 *coverage=0 *format=general-:score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle > lastz_wrapper_out4.tabular
203 lastz phiX2.fasta test-data/b1.fasta *yasra95short *ambiguousn *nolaj *identity=0..100 *coverage=0 *format=general-:score,name1,strand1,size1,start1,zstart1,end1,length1,text1,name2,strand2,size2,start2,zstart2,end2,start2+,zstart2+,end2+,length2,text2,diff,cigar,identity,coverage,gaprate,diagonal,shingle >> lastz_wrapper_out4.tabular
204 You need to point to phiX1.fasta and phiX2.fasta somewhere on your system.
205 phiX_split.fasta and b1.fasta are located in galaxy's test-data
206 You will have to replace all the asterisks before the commands with 2 dashes,
207 as double-dash can't appear in an XML comment
208
209 NOTE: since the input file include more than 1 sequence, the output must be sorted in
210 order for functional test to pass. This is done using the sort="True" attribute on the output.
211 -->
212 <param name="input2" value="b1.fasta" ftype="fasta" />
213 <param name="ref_source" value="history" />
214 <param name="input1" value="phiX_split.fasta" ftype="fasta" />
215 <param name="out_format" value="tabular" />
216 <param name="source_select" value="pre_set" />
217 <param name="pre_set_options" value="yasra95short" />
218 <param name="how_to_name" value="no" />
219 <param name="min_ident" value="0" />
220 <param name="max_ident" value="100" />
221 <param name="min_cvrg" value="0" />
222 <param name="unmask" value="yes" />
223 <output name="output1" file="lastz_wrapper_out4.tabular" sort="True" />
224 </test>
225 </tests>
226 <help>
227
228 **What it does**
229
230 **LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. This Galaxy version of LASTZ is geared towards aligning short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) reads against a reference sequence. There is excellent, extensive documentation on LASTZ available here_.
231
232 .. _here: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02.00.html
233
234 ------
235
236 **Input formats**
237
238 LASTZ accepts reference and reads in FASTA format. However, because Galaxy supports implicit format conversion the tool will recognize fastq and other method specific formats.
239
240 ------
241
242 **Outputs**
243
244 LASTZ generates one output. Depending on the choice you make in the *Select output format* drop-down, LASTZ will produce a SAM file showing sequence alignments, a list of differences between the reads and reference (Polymorphisms), or a general table with one line per alignment block (Tabular). Examples of these outputs are shown below.
245
246 **SAM output**
247
248 SAM has 12 columns::
249
250 1 2 3 4 5 6 7 8 9 10 11 12
251 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
252 HWI-EAS91_1_30788AAXX:1:2:1670:915 99 chr9 58119878 60 36M = 58120234 392 GACCCCTACCCCACCGTGCTCTGGATCTCAGTGTTT IIIIIIIIIIIIIIIIEIIIIIII7IIIIIIIIIII XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:36
253 HWI-EAS91_1_30788AAXX:1:2:1670:915 147 chr9 58120234 60 36M = 58119878 -392 ATGAGTCGAATTCTATTTTCCAAACTGTTAACAAAA IFIIDI;IIICIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:36
254
255
256 where::
257
258 Column Description
259 --------- ---------------------------------------------------------------------
260 1. QNAME Query (pair) NAME
261 2. FLAG bitwise FLAG
262 3. RNAME Reference sequence NAME
263 4. POS 1-based leftmost POSition/coordinate of clipped sequence
264 5. MAPQ MAPping Quality (Phred-scaled)
265 6. CIGAR extended CIGAR string
266 7. MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
267 8. MPOS 1-based Mate POSition
268 9. ISIZE Inferred insert SIZE
269 10. SEQ query SEQuence on the same strand as the reference
270 11. QUAL query QUALity (ASCII-33 gives the Phred base quality)
271 12. OPT variable OPTional fields in the format TAG:VTYPE:VALUE, tab-separated
272
273 The flags are as follows::
274
275 Flag Description
276 ------ -------------------------------------
277 0x0001 the read is paired in sequencing
278 0x0002 the read is mapped in a proper pair
279 0x0004 the query sequence itself is unmapped
280 0x0008 the mate is unmapped
281 0x0010 strand of the query (1 for reverse)
282 0x0020 strand of the mate
283 0x0040 the read is the first read in a pair
284 0x0080 the read is the second read in a pair
285 0x0100 the alignment is not primary
286
287 **Polymorphism (SNP or differences) output**
288
289 Polymorphism output contains 14 columns::
290
291 1 2 3 4 5 6 7 8 9 10 11 12 13 14
292 --------------------------------------------------------------------------------------------------------------------------------------------------------------
293 chrM 2490 2491 + 5386 HWI-EAS91_1_306UPAAXX:6:1:486:822 10 11 - 36 C A ACCTGTTTTACAGACACCTAAAGCTACATCGTCAAC ACCTGTTTTAAAGACACCTAAAGCTACATCGTCAAC
294 chrM 2173 2174 + 5386 HWI-EAS91_1_306UPAAXX:6:1:259:1389 26 27 + 36 G T GCGTACTTATTCGCCACCATGATTATGACCAGTGTT GCGTACTTATTCGCCACCATGATTATTACCAGTGTT
295
296 where::
297
298 1. (chrM) - Reference sequence id
299 2. (2490) - Start position of the difference in the reference
300 3. (2491) - End position of the difference in the reference
301 4. (+) - Strand of the reference (always plus)
302 5. (5386) - Length of the reference sequence
303 6. (HWI...) - read id
304 7. (10) - Start position of the difference in the read
305 8. (11) - End position of the difference in the read
306 9. (+) - Strand of the read
307 10. (36) - Length of the read
308 11. (C) - Nucleotide in the reference
309 12. (A) - Nucleotide in the read
310 13. (ACC...) - Reference side os the alignment
311 14. (ACC...) - Read side of the alignment
312
313 **Tabular output**
314
315 Tabular output is a tab-separated format with 30 columns::
316
317 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
318 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
319 14 PHIX174 + 5386 4648 4647 4661 14 ATTTTCGTGATATT EYKX4VC01BV8HS + 204 154 153 167 154 153 167 14 ATTTTCGTGATATT .............. 14M 14/14 100.0% 14/204 6.9% 0/14 0.0% 4494 NA
320 16 PHIX174 + 5386 3363 3362 3378 16 GACGCCGGATTTGAGA EYKX4VC01AWJ88 - 259 36 35 51 209 208 224 16 GACGCCGGATTTGAGA ................ 16M 16/16 100.0% 16/259 6.2% 0/16 0.0% 3327 NA
321
322 The following columns are present::
323
324 Field Meaning
325 ---------------- -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
326 1. score Score of the alignment block. The scale and meaning of this number will vary, depending on the final stage performed and other command-line options.
327 2. name1 Name of the target sequence.
328 3. strand1 Target sequence strand, either "+" or "−".
329 4. size1 Size of the entire target sequence.
330 5. start1 Starting position of the alignment block in the target, origin-one.
331 6. zstart1 Starting position of the alignment block in the target, origin-zero.
332 7. end1 Ending position of the alignment block in the target, expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems).
333 8. length1 Length of the alignment block in the target (excluding gaps).
334 9. text1 Aligned characters in the target, including gap characters.
335 10. name2 Name of the query sequence.
336 11. strand2 Query sequence strand, either "+" or "−".
337 12. size2 Size of the entire query sequence.
338 13. start2 Starting position of the alignment block in the query, origin-one.
339 14. zstart2 Starting position of the alignment block in the query, origin-zero.
340 15. end2 Ending position of the alignment block in the query, expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems).
341 16. start2+ Starting position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), origin-one. Note that if strand2 is "−", then this is the other end of the block from start2.
342 17. zstart2+ Starting position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), origin-zero. Note that if strand2 is "−", then this is the other end of the block from zstart2.
343 18. end2+ Ending position of the alignment block in the query, counting along the query sequence's positive strand (regardless of which query strand was aligned), expressed either as origin-one closed or origin-zero half-open (the ending value is the same in both systems). Note that if strand2 is "−", then this is the other end of the block from end2.
344 19. length2 Length of the alignment block in the query (excluding gaps).
345 20. text2 Aligned characters in the query, including gap characters.
346 21. diff Differences between what would be written for text1 and text2. Matches are written as . (period), transitions as : (colon), transversions as X, and gaps as - (hyphen).
347 22. cigar A CIGAR-like representation of the alignment's path through the Dynamic Programming matrix. This is the short representation, without spaces, described in the Ensembl CIGAR specification.
348 23./24. identity Fraction of aligned bases in the block that are matches (see Identity). This is written as two fields. The first field is a fraction, written as &lt;n&gt;/&lt;d&gt;. The second field contains the same value, computed as a percentage.
349 25./26. coverage Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (see Coverage). This is written as two fields. The first field is a fraction, written as &lt;n&gt;/&lt;d&gt;. The second field contains the same value, computed as a percentage.
350 27./28. gaprate Rate of gaps (also called indels) in the alignment block. This is written as two fields. The first field is a fraction, written as &lt;n&gt;/&lt;d&gt;, with the numerator being the number of alignment columns containing gaps and the denominator being the number without gaps. The second field contains the same value, computed as a percentage.
351 29. diagonal The diagonal of the start of the alignment block in the dynamic programming matrix, expressed as an identifying number start1-start2.
352 30. shingle A measurement of the shingle overlap between the target and the query. This is intended for the case where both the target and query are relatively short, and their ends are expected to overlap.
353
354 -------
355
356 **LASTZ Settings**
357
358 There are two setting modes: (1) **Commonly used settings** and (2) **Full Parameter List**.
359
360 **Commonly used settings**
361
362 There are seven modes::
363
364 Illumina-Solexa/AB-SOLiD 95% identity
365 Illumina-Solexa/AB-SOLiD 85% identity
366 Roche-454 98% identity
367 Roche-454 95% identity
368 Roche-454 90% identity
369 Roche-454 85% identity
370 Roche-454 75% identity
371
372 When deciding which one to use, consider the following: a 36 bp read with two differences will be 34/36 = 94% identical to the reference.
373
374 **Full Parameter List**
375
376 This mode gives you fuller control over lastz. The description of these and other parameters is found at the end of this page. Note that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu.
377
378 ------
379
380 **Do you want to modify the reference name?**
381
382 This option allows you to set the name of the reference sequence manually. This is helpful when, for example, you would like to make the reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at the UCSC Genome Browser.
383
384 ------
385
386 **LASTZ parameter list**
387
388 This is an exhaustive list of LASTZ options. Once again, please note that not all options are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu::
389
390 target[[s..e]][-] spec/file containing target sequence (fasta or nib)
391 [s..e] defines a subrange of the file
392 - indicates reverse-complement
393 (use --help=files for more details)
394 query[[s..e]][-] spec/file containing query sequences (fasta or nib)
395 if absent, queries come from stdin (unless they
396 aren't needed, as for --self or --tableonly)
397 (use --help=files for more details)
398 --self the target sequence is also the query
399 --quantum the query sequence contains quantum DNA
400 --seed=match&lt;length&gt; use a word with no gaps instead of a seed pattern
401 --seed=half&lt;length&gt; use space-free half-weight word instead of seed pattern
402 --match=&lt;reward&gt;[,&lt;penalty&gt;] set the score values for a match (+&lt;reward&gt;)
403 and mismatch (-&lt;penalty&gt;)
404 --[no]trans[ition][=2] allow one or two transitions in a seed hit
405 (by default a transition is allowed)
406 --word=&lt;bits&gt; set max bits for word hash; use this to trade time for
407 memory, eliminating thrashing for heavy seeds
408 (default is 28 bits)
409 --[no]filter=[&lt;T&gt;:]&lt;M&gt; filter half-weight seed hits, requiring at least M
410 matches and allowing no more than T transversions
411 (default is no filtering)
412 --notwins require just one seed hit
413 --twins=[&lt;min&gt;:]&lt;maxgap&gt; require two nearby seed hits on the same diagonal
414 (default is twins aren't required)
415 --notwins allow single, isolated seeds
416 --[no]recoverseeds avoid losing seeds in hash collisions. Cannot be used with --twins
417 --seedqueue=&lt;entries&gt; set number of entries in seed hit queue
418 (default is 262144)
419 --anchors=&lt;file&gt; read anchors from a file, instead of discovering anchors
420 via seeding
421 --recoverhits recover hash-collision seed hits
422 (default is not to recover seed hits)
423 --step=&lt;length&gt; set step length (default is 1)
424 --maxwordcount=&lt;limit&gt; words occurring more often than &lt;limit&gt; in the target
425 are not eligible for seeds
426 --strand=both search both strands
427 --strand=plus search + strand only (matching strand of query spec)
428 --strand=minus search - strand only (opposite strand of query spec)
429 (by default both strands are searched)
430 --ambiguousn treat N as an ambiguous nucleotide
431 (by default N is treated as a sequence splicing character)
432 --[no]gfextend perform gap-free extension of seed hits to HSPs
433 (by default no extension is performed)
434 --[no]chain perform chaining
435 --chain=&lt;diag,anti&gt; perform chaining with given penalties for diagonal and
436 anti-diagonal
437 (by default no chaining is performed)
438 --[no]gapped perform gapped alignment (instead of gap-free)
439 (by default gapped alignment is performed)
440 --score[s]=&lt;file&gt; read substitution scores from a file
441 (default is HOXD70)
442 --unitscore[s] scores are +1/-1 for match/mismatch
443 --gap=&lt;[open,]extend&gt; set gap open and extend penalties (default is 400,30)
444 --xdrop=&lt;score&gt; set x-drop threshold (default is 10*sub[A][A])
445 --ydrop=&lt;score&gt; set y-drop threshold (default is open+300extend)
446 --infer[=&lt;control&gt;] infer scores from the sequences, then use them
447 --inferonly[=&lt;control&gt;] infer scores, but don't use them (requires --infscores)
448 all inference options are read from the control file
449 --infscores[=&lt;file&gt;] write inferred scores to a file
450 --hspthresh=&lt;score&gt; set threshold for high scoring pairs (default is 3000)
451 ungapped extensions scoring lower are discarded
452 &lt;score&gt; can also be a percentage or base count
453 --entropy adjust for entropy when qualifying HSPs in the x-drop extension
454 method
455 --noentropy don't adjust for entropy when qualifying HSPs
456 --exact=&lt;length&gt; set threshold for exact matches
457 if specified, exact matches are found rather than high
458 scoring pairs (replaces --hspthresh)
459 --inner=&lt;score&gt; set threshold for HSPs during interpolation
460 (default is no interpolation)
461 --gappedthresh=&lt;score&gt; set threshold for gapped alignments
462 gapped extensions scoring lower are discarded
463 &lt;score&gt; can also be a percentage or base count
464 (default is to use same value as --hspthresh)
465 --ball=&lt;score&gt; set minimum score required of words 'in' a quantum ball
466 --[no]entropy involve entropy in filtering high scoring pairs
467 (default is "entropy")
468 --[no]mirror report/use mirror image of all gap-free alignments
469 (default is "mirror" for self-alignments only)
470 --traceback=&lt;bytes&gt; space for trace-back information
471 (default is 80.0M)
472 --masking=&lt;count&gt; mask any position in target hit this many times
473 zero indicates no masking
474 (default is no masking)
475 --targetcapsule=&lt;capsule_file&gt; the target seed word position table and seed
476 (as well as the target sequence)are read from specified file
477 --segments=&lt;segment_file&gt; read segments from a file, instead of discovering
478 them via seeding. Replaces other seeding or gap-free extension
479 options
480 --[no]census[=&lt;file&gt;] count/report how many times each target base aligns
481 (default is to not report census)
482 --identity=&lt;min&gt;[..&lt;max&gt;] filter alignments by percent identity
483 0&lt;=min&lt;=max&lt;=100; blocks (or HSPs) outside min..max
484 are discarded
485 (default is no identity filtering)
486 --coverage=&lt;min&gt;[..&lt;max&gt;] filter alignments by percentage pf query covered
487 0&lt;=min&lt;=max&lt;=100; blocks (or HSPs) outside min..max
488 are discarded
489 (default is no query coverage filtering)
490 --notrivial do not output trivial self-alignment block if the target and query
491 sequences are identical. Using --self enables this option automatically
492 --output=&lt;output_file&gt; write the alignments to the specified file name instead of stdout
493 --code=&lt;file&gt; give quantum code for query sequence (only for display)
494 --format=&lt;type&gt; specify output format; one of lav, axt, maf, maf+, maf-, text,
495 lav+text, cigar, text, rdplot, general, or general:&lt;fields&gt;
496 (by default output is LAV)
497 --rdotplot=&lt;file&gt; create an additional output file suitable for plotting the alignments
498 with the R statistical package.
499 --markend Just before normal completion, write "# lastz end-of-file" to output file
500 --census[=&lt;output_file&gt;] count and report how many times each target base aligns, up
501 to 255. Ns are included in the count
502 --census16[=&lt;output_file&gt;] count and report how many times each target base aligns, up
503 up 65 thousand
504 --census32[=&lt;output_file&gt;] count and report how many times each target bas aligns, up
505 to 4 billion
506 --writecapsule=&lt;capsule_file&gt; just write out a target capsule file and quit; don't
507 search for seeds or perform subsequent stages
508 --verbosity=&lt;level&gt; set info level (0 is minimum, 10 is everything)
509 (default is 0)
510 --[no]runtime report runtime in the output file
511 (default is to not report runtime)
512 --tableonly[=count] just produce the target position table, don't
513 search for seeds
514 --[no]stats[=&lt;file&gt;] show search statistics (or don't)
515 (not available in this build)
516 --version report the program version and quit
517 --help list all options
518 --help=files list information about file specifiers
519 --help=short[cuts] list blastz-compatible shortcuts
520 --help=yasra list yasra-specific shortcuts
521
522 </help>
523 </tool>