comparison gsnap.xml @ 3:488e9d642566 draft

GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
author peterjc
date Wed, 28 Sep 2016 10:47:28 -0400
parents f6ba0f12cca2
children 14561eb803a5
comparison
equal deleted inserted replaced
2:f6ba0f12cca2 3:488e9d642566
1 <tool id="gsnap" name="GSNAP" version="3.0.0"> 1 <tool id="gsnap" name="GSNAP" version="3.0.1">
2 <description>Genomic Short-read Nucleotide Alignment Program</description> 2 <description>Genomic Short-read Nucleotide Alignment Program</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2013-05-09">gmap</requirement> 4 <requirement type="package" version="2013-05-09">gmap</requirement>
5 </requirements> 5 </requirements>
6 <version_string>gsnap --version</version_string> 6 <version_command>gsnap --version</version_command>
7 <command> 7 <command>
8 #import os.path, re 8 #import os.path, re
9 gsnap 9 gsnap
10 --nthreads="4" --ordered 10 --nthreads="4" --ordered
11 #if $refGenomeSource.genomeSource == "gmapdb": 11 #if $refGenomeSource.genomeSource == "gmapdb":
138 #if $output.options == "advanced": 138 #if $output.options == "advanced":
139 #if $output.npath.__str__ != '': 139 #if $output.npath.__str__ != '':
140 --npath=$output.npath 140 --npath=$output.npath
141 #end if 141 #end if
142 #if $output.maxsearch.__str__ != '': 142 #if $output.maxsearch.__str__ != '':
143 --maxsearch=$output.maxsearch 143 --maxsearch=$output.maxsearch
144 #end if 144 #end if
145 $output.quiet_if_excessive 145 $output.quiet_if_excessive
146 $output.show_refdiff 146 $output.show_refdiff
147 $output.clip_overlap 147 $output.clip_overlap
148 #end if 148 #end if
264 <option value="RF">rev-fwd, for circularized inserts</option> 264 <option value="RF">rev-fwd, for circularized inserts</option>
265 <option value="FF">fwd-fwd, same strand</option> 265 <option value="FF">fwd-fwd, same strand</option>
266 </param> 266 </param>
267 <param name="pairmax_dna" type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/> 267 <param name="pairmax_dna" type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/>
268 <param name="pairmax_rna" type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used when novel splicing is specified or a splice file is provided. Should probably match the value for localsplicedist."/> 268 <param name="pairmax_rna" type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used when novel splicing is specified or a splice file is provided. Should probably match the value for localsplicedist."/>
269 <param name="pairexpect" type="integer" value="" optional="true" label="Expected paired-end length" 269 <param name="pairexpect" type="integer" value="" optional="true" label="Expected paired-end length"
270 help="Used for calling splices in medial part of paired-end reads (default 200)"/> 270 help="Used for calling splices in medial part of paired-end reads (default 200)"/>
271 <param name="pairdev" type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length" 271 <param name="pairdev" type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length"
272 help="Used for calling splices in medial part of paired-end reads (default 25)"/> 272 help="Used for calling splices in medial part of paired-end reads (default 25)"/>
273 </when> 273 </when>
274 </conditional> 274 </conditional>
275 <param name="barcode_length" type="integer" value="" optional="true" label="Amount of barcode to remove from start of read (default 0)" /> 275 <param name="barcode_length" type="integer" value="" optional="true" label="Amount of barcode to remove from start of read (default 0)" />
276 <param name="fastq_id_start" type="integer" value="" optional="true" label="Starting field of identifier in FASTQ header, whitespace-delimited, starting from 1" /> 276 <param name="fastq_id_start" type="integer" value="" optional="true" label="Starting field of identifier in FASTQ header, whitespace-delimited, starting from 1" />
277 <param name="fastq_id_end" type="integer" value="" optional="true" label="Ending field of identifier in FASTQ header, whitespace-delimited, starting from 1" 277 <param name="fastq_id_end" type="integer" value="" optional="true" label="Ending field of identifier in FASTQ header, whitespace-delimited, starting from 1"
278 help="Examples: 278 help="Examples:
279 &lt;br&gt;@HWUSI-EAS100R:6:73:941:1973#0/1 279 &lt;br&gt;@HWUSI-EAS100R:6:73:941:1973#0/1
280 &lt;br&gt; . start=1, end=1 (default) => identifier is HWUSI-EAS100R:6:73:941:1973#0/1 280 &lt;br&gt; . start=1, end=1 (default) => identifier is HWUSI-EAS100R:6:73:941:1973#0/1
281 &lt;br&gt;@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36 281 &lt;br&gt;@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
282 &lt;br&gt; . start=1, end=1 => identifier is SRR001666.1 282 &lt;br&gt; . start=1, end=1 => identifier is SRR001666.1
283 &lt;br&gt; . start=2, end=2 => identifier is 071112_SLXA-EAS1_s_7:5:1:817:345 283 &lt;br&gt; . start=2, end=2 => identifier is 071112_SLXA-EAS1_s_7:5:1:817:345
284 &lt;br&gt; . start=1, end=2 => identifier is SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345" 284 &lt;br&gt; . start=1, end=2 => identifier is SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345"
285 /> 285 />
286 <param name="filter_chastity" type="select" label="Skip reads marked by the Illumina chastity program" 286 <param name="filter_chastity" type="select" label="Skip reads marked by the Illumina chastity program"
287 help="String after the accession having a 'Y' after the first colon, like this: 287 help="String after the accession having a 'Y' after the first colon, like this:
288 &lt;br&gt;@accession 1:Y:0:CTTGTA 288 &lt;br&gt;@accession 1:Y:0:CTTGTA
289 &lt;br&gt;where the 'Y' signifies filtering by chastity. 289 &lt;br&gt;where the 'Y' signifies filtering by chastity.
290 &lt;br&gt; For 'either', a 'Y' on either end of a paired-end read will be filtered. 290 &lt;br&gt; For 'either', a 'Y' on either end of a paired-end read will be filtered.
291 &lt;br&gt; For 'both', a 'Y' is required on both ends of a paired-end read (or on the only end of a single-end read)" 291 &lt;br&gt; For 'both', a 'Y' is required on both ends of a paired-end read (or on the only end of a single-end read)"
292 > 292 >
301 --> 301 -->
302 <when value="gsnap_fasta"> 302 <when value="gsnap_fasta">
303 <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/> 303 <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/>
304 <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/> 304 <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/>
305 </when> 305 </when>
306 306
307 </conditional> 307 </conditional>
308 <!-- No longer in options as of version 2011-11-30 308 <!-- No longer in options as of version 2011-11-30
309 <param name="mapq_unique_score" type="integer" value="" optional="true" label="MAPQ score threshold" 309 <param name="mapq_unique_score" type="integer" value="" optional="true" label="MAPQ score threshold"
310 help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this 310 help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this
311 (if not selected, then reports all multiple results, up to npaths)" /> 311 (if not selected, then reports all multiple results, up to npaths)" />
312 --> 312 -->
313 313
314 <!-- GMAPDB for alignment --> 314 <!-- GMAPDB for alignment -->
348 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option> 348 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option>
349 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option> 349 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option>
350 </param> 350 </param>
351 351
352 <conditional name="use_splicing"> 352 <conditional name="use_splicing">
353 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns" 353 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
354 help="Look for splicing involving known sites or known introns at short or long distances 354 help="Look for splicing involving known sites or known introns at short or long distances
355 See README instructions for the distinction between known sites and known introns"> 355 See README instructions for the distinction between known sites and known introns">
356 <option value="none" selected="true">None</option> 356 <option value="none" selected="true">None</option>
357 <option value="gmapdb">From the GMAP Database</option> 357 <option value="gmapdb">From the GMAP Database</option>
358 <option value="history">A Map in your history</option> 358 <option value="history">A Map in your history</option>
359 </param> 359 </param>
360 <when value="none"/> 360 <when value="none"/>
361 <when value="history"> 361 <when value="history">
362 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 362 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" label="Select a splicesite map"
363 help="built with GMAP IIT"/> 363 help="built with GMAP IIT"/>
364 </when> 364 </when>
365 <when value="gmapdb"> 365 <when value="gmapdb">
366 <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help=""> 366 <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
367 <options from_file="gmap_indices.loc"> 367 <options from_file="gmap_indices.loc">
382 <option value="gmapdb">From the GMAP Database</option> 382 <option value="gmapdb">From the GMAP Database</option>
383 <option value="history">A SNP Index in your history</option> 383 <option value="history">A SNP Index in your history</option>
384 </param> 384 </param>
385 <when value="none"/> 385 <when value="none"/>
386 <when value="history"> 386 <when value="history">
387 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 387 <param name="snpindex" type="data" format="gmapsnpindex" label="Select a snpindex"
388 help="built with GMAP SNP Index"/> 388 help="built with GMAP SNP Index"/>
389 </when> 389 </when>
390 <when value="gmapdb"> 390 <when value="gmapdb">
391 <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help=""> 391 <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
392 <options from_file="gmap_indices.loc"> 392 <options from_file="gmap_indices.loc">
401 </when> 401 </when>
402 </conditional> 402 </conditional>
403 403
404 </when> 404 </when>
405 <when value="gmapdb"> 405 <when value="gmapdb">
406 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 406 <param name="gmapdb" type="data" format="gmapdb" label="Select a gmapdb"
407 help="A GMAP database built with GMAP Build"/> 407 help="A GMAP database built with GMAP Build"/>
408 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size"> 408 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
409 <options> 409 <options>
410 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/> 410 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
411 </options> 411 </options>
418 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option> 418 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option>
419 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option> 419 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option>
420 </param> 420 </param>
421 421
422 <conditional name="use_splicing"> 422 <conditional name="use_splicing">
423 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns" 423 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
424 help="Look for splicing involving known sites or known introns at short or long distances 424 help="Look for splicing involving known sites or known introns at short or long distances
425 See README instructions for the distinction between known sites and known introns"> 425 See README instructions for the distinction between known sites and known introns">
426 <option value="none" selected="true">None</option> 426 <option value="none" selected="true">None</option>
427 <option value="gmapdb">From the GMAP Database</option> 427 <option value="gmapdb">From the GMAP Database</option>
428 <option value="history">A Map in your history</option> 428 <option value="history">A Map in your history</option>
429 </param> 429 </param>
430 <when value="none"/> 430 <when value="none"/>
431 <when value="history"> 431 <when value="history">
432 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 432 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" label="Select a splicesite map"
433 help="built with GMAP IIT"/> 433 help="built with GMAP IIT"/>
434 <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites" 434 <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
435 help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron. 435 help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.
436 This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/> 436 This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
437 </when> 437 </when>
438 <when value="gmapdb"> 438 <when value="gmapdb">
439 <param name="splicemap" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help=""> 439 <param name="splicemap" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
440 <options> 440 <options>
441 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/> 441 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
442 </options> 442 </options>
443 </param> 443 </param>
444 <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites" 444 <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
445 help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron. 445 help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.
446 This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/> 446 This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
447 </when> 447 </when>
448 </conditional> 448 </conditional>
449 449
450 <conditional name="use_snps"> 450 <conditional name="use_snps">
453 <option value="gmapdb">From the GMAP Database</option> 453 <option value="gmapdb">From the GMAP Database</option>
454 <option value="history">A SNP Index in your history</option> 454 <option value="history">A SNP Index in your history</option>
455 </param> 455 </param>
456 <when value="none"/> 456 <when value="none"/>
457 <when value="history"> 457 <when value="history">
458 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 458 <param name="snpindex" type="data" format="gmapsnpindex" label="Select a snpindex"
459 help="built with GMAP SNP Index"/> 459 help="built with GMAP SNP Index"/>
460 </when> 460 </when>
461 <when value="gmapdb"> 461 <when value="gmapdb">
462 <param name="snpindex" type="select" data_ref="gmapdb" label="Use database containing known SNPs" help=""> 462 <param name="snpindex" type="select" data_ref="gmapdb" label="Use database containing known SNPs" help="">
463 <options> 463 <options>
476 <option value="default">Use default settings</option> 476 <option value="default">Use default settings</option>
477 <option value="advanced">Set Computation Options</option> 477 <option value="advanced">Set Computation Options</option>
478 </param> 478 </param>
479 <when value="default"/> 479 <when value="default"/>
480 <when value="advanced"> 480 <when value="advanced">
481 <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)" 481 <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)"
482 help="Maximum number of mismatches allowed (if not specified, then 482 help="Maximum number of mismatches allowed (if not specified, then
483 defaults to the ultrafast level of ((readlength+index_interval-1)/kmer - 2)) 483 defaults to the ultrafast level of ((readlength+index_interval-1)/kmer - 2))
484 (By default, the genome index interval is 3, but this can be changed 484 (By default, the genome index interval is 3, but this can be changed
485 by providing a different value for -q to gmap_build when processing the genome.) 485 by providing a different value for -q to gmap_build when processing the genome.)
486 If specified between 0.0 and 1.0, then treated as a fraction 486 If specified between 0.0 and 1.0, then treated as a fraction
495 <param name="maxsearch" type="integer" value="" optional="true" label="Maximum number of alignments to find (default 1000)" 495 <param name="maxsearch" type="integer" value="" optional="true" label="Maximum number of alignments to find (default 1000)"
496 help="Must be larger than paths, which is the number to report. 496 help="Must be larger than paths, which is the number to report.
497 Keeping this number large will allow for random selection among multiple alignments. 497 Keeping this number large will allow for random selection among multiple alignments.
498 Reducing this number can speed up the program. "/> 498 Reducing this number can speed up the program. "/>
499 499
500 <param name="terminal_threshold" type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment" 500 <param name="terminal_threshold" type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment"
501 help="Threshold for searching for a terminal alignment (from one end of the 501 help="Threshold for searching for a terminal alignment (from one end of the
502 read to the best possible position at the other end) (default 2 502 read to the best possible position at the other end) (default 2
503 for standard, atoi-stranded, and atoi-nonstranded mode; default 100 503 for standard, atoi-stranded, and atoi-nonstranded mode; default 100
504 for cmet-stranded and cmet-nonstranded mode). 504 for cmet-stranded and cmet-nonstranded mode).
505 For example, if this value is 2, then if GSNAP finds an exact or 505 For example, if this value is 2, then if GSNAP finds an exact or
508 obtain terminal alignments for very short reads, although such reads 508 obtain terminal alignments for very short reads, although such reads
509 probably don't have enough specificity for terminal alignments anyway. 509 probably don't have enough specificity for terminal alignments anyway.
510 To turn off terminal alignments, set this to a high value, greater 510 To turn off terminal alignments, set this to a high value, greater
511 than the value for max-mismatches. 511 than the value for max-mismatches.
512 "/> 512 "/>
513 <param name="indel_penalty" type="integer" value="" optional="true" label="Penalty for an indel (default 2)" 513 <param name="indel_penalty" type="integer" value="" optional="true" label="Penalty for an indel (default 2)"
514 help="Counts against mismatches allowed. To find indels, make indel-penalty less than or equal to max-mismatches. A value &lt; 2 can lead to false positives at read ends" /> 514 help="Counts against mismatches allowed. To find indels, make indel-penalty less than or equal to max-mismatches. A value &lt; 2 can lead to false positives at read ends" />
515 <param name="indel_endlength" type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" /> 515 <param name="indel_endlength" type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
516 <param name="max_middle_insertions" type="integer" value="" optional="true" label="Maximum number of middle insertions allowed (default 9)" /> 516 <param name="max_middle_insertions" type="integer" value="" optional="true" label="Maximum number of middle insertions allowed (default 9)" />
517 <param name="max_middle_deletions" type="integer" value="" optional="true" label="Maximum number of middle deletions allowed (default 30)" /> 517 <param name="max_middle_deletions" type="integer" value="" optional="true" label="Maximum number of middle deletions allowed (default 30)" />
518 <param name="max_end_insertions" type="integer" value="" optional="true" label="Maximum number of end insertions allowed (default 3)" /> 518 <param name="max_end_insertions" type="integer" value="" optional="true" label="Maximum number of end insertions allowed (default 3)" />
519 <param name="max_end_deletions" type="integer" value="" optional="true" label="Maximum number of end deletions allowed (default 6)" /> 519 <param name="max_end_deletions" type="integer" value="" optional="true" label="Maximum number of end deletions allowed (default 6)" />
520 <param name="suboptimal_levels" type="integer" value="" optional="true" label="Report suboptimal hits beyond best hit (default 0)" 520 <param name="suboptimal_levels" type="integer" value="" optional="true" label="Report suboptimal hits beyond best hit (default 0)"
521 help="All hits with best score plus suboptimal-levels are reported" /> 521 help="All hits with best score plus suboptimal-levels are reported" />
522 <param name="adapter_strip" type="select" label="Method for removing adapters from reads" 522 <param name="adapter_strip" type="select" label="Method for removing adapters from reads"
523 help="Default is 'off'. To turn on, specify 'paired', which removes adapters 523 help="Default is 'off'. To turn on, specify 'paired', which removes adapters
524 from paired-end reads if they appear to be present."> 524 from paired-end reads if they appear to be present.">
525 <option value="paired">paired</option> 525 <option value="paired">paired</option>
526 <option value="off" selected="true">off</option> 526 <option value="off" selected="true">off</option>
527 </param> 527 </param>
528 <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" 528 <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)"
529 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/> 529 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/>
530 <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)" 530 <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)"
531 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive indels at the ends of reads)"/> 531 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive indels at the ends of reads)"/>
532 <param name="use_tally" type="data" format="tally.iit" optional="true" metadata_name="dbkey" label="Select a tally IIT file to resolve concordant multiple results" 532 <param name="use_tally" type="data" format="tally.iit" optional="true" label="Select a tally IIT file to resolve concordant multiple results"
533 help="generated by gsnap_tally and iit_store"/> 533 help="generated by gsnap_tally and iit_store"/>
534 534
535 <!-- 535 <!--
536 tallydir=STRING Directory for tally IIT file to resolve concordant multiple results (default is 536 tallydir=STRING Directory for tally IIT file to resolve concordant multiple results (default is
537 location of genome index files specified using -D and -d). Note: can 537 location of genome index files specified using -D and -d). Note: can
540 runlengthdir=STRING Directory for runlength IIT file to resolve concordant multiple results (default is 540 runlengthdir=STRING Directory for runlength IIT file to resolve concordant multiple results (default is
541 location of genome index files specified using -D and -d). Note: can 541 location of genome index files specified using -D and -d). Note: can
542 just give full path name to use-runlength instead. 542 just give full path name to use-runlength instead.
543 use-runlength=STRING Use this runlength IIT file to resolve concordant multiple results 543 use-runlength=STRING Use this runlength IIT file to resolve concordant multiple results
544 --> 544 -->
545 545
546 <!-- Options for GMAP alignment within GSNAP --> 546 <!-- Options for GMAP alignment within GSNAP -->
547 <param name="gmap_mode" type="select" multiple="true" optional="true" display="checkboxes" label="Cases to use GMAP for complex alignments containing multiple splices or indels" 547 <param name="gmap_mode" type="select" multiple="true" optional="true" display="checkboxes" label="Cases to use GMAP for complex alignments containing multiple splices or indels"
548 help="Default: pairsearch,terminal,improve"> 548 help="Default: pairsearch,terminal,improve">
549 <option value="pairsearch" selected="true">pairsearch</option> 549 <option value="pairsearch" selected="true">pairsearch</option>
550 <option value="indel_knownsplice" selected="true">indel_knownsplice</option> 550 <option value="indel_knownsplice" selected="true">indel_knownsplice</option>
551 <option value="terminal" selected="true">terminal</option> 551 <option value="terminal" selected="true">terminal</option>
552 <option value="improve" selected="true">improve</option> 552 <option value="improve" selected="true">improve</option>
553 </param> 553 </param>
554 <param name="trigger_score_for_gmap" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 5)" 554 <param name="trigger_score_for_gmap" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 5)"
555 help="Try GMAP pairsearch on nearby genomic regions if best score (the total of both ends if paired-end) exceeds this value (default 5)" /> 555 help="Try GMAP pairsearch on nearby genomic regions if best score (the total of both ends if paired-end) exceeds this value (default 5)" />
556 <param name="max_gmap_pairsearch" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 3)" 556 <param name="max_gmap_pairsearch" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 3)"
557 help="Perform GMAP pairsearch on nearby genomic regions up to this many candidate ends (default 3)." /> 557 help="Perform GMAP pairsearch on nearby genomic regions up to this many candidate ends (default 3)." />
558 <param name="max_gmap_terminal" type="integer" value="" optional="true" label="GMAP terminal threshold (default 3)" 558 <param name="max_gmap_terminal" type="integer" value="" optional="true" label="GMAP terminal threshold (default 3)"
559 help="Perform GMAP terminal on nearby genomic regions up to this many candidate ends (default 3)." /> 559 help="Perform GMAP terminal on nearby genomic regions up to this many candidate ends (default 3)." />
560 <param name="max_gmap_improvement" type="integer" value="" optional="true" label="GMAP improvement threshold (default 3)" 560 <param name="max_gmap_improvement" type="integer" value="" optional="true" label="GMAP improvement threshold (default 3)"
561 help="Perform GMAP improvement on nearby genomic regions up to this many candidate ends (default 3)." /> 561 help="Perform GMAP improvement on nearby genomic regions up to this many candidate ends (default 3)." />
562 <param name="microexon_spliceprob" type="float" value="" optional="true" label="GMAP microexons threshold (default .90)" 562 <param name="microexon_spliceprob" type="float" value="" optional="true" label="GMAP microexons threshold (default .90)"
563 help="Allow microexons only if one of the splice site probabilities is greater than this value." > 563 help="Allow microexons only if one of the splice site probabilities is greater than this value." >
564 <validator type="in_range" message="The microexons probability must be between 0. and 1." min="0." max="1."/> 564 <validator type="in_range" message="The microexons probability must be between 0. and 1." min="0." max="1."/>
565 </param> 565 </param>
566 </when> 566 </when>
567 </conditional> 567 </conditional>
572 <option value="advanced">Set Splicing Options</option> 572 <option value="advanced">Set Splicing Options</option>
573 </param> 573 </param>
574 <when value="default"/> 574 <when value="default"/>
575 <when value="advanced"> 575 <when value="advanced">
576 <!-- Splicing options for RNA-Seq --> 576 <!-- Splicing options for RNA-Seq -->
577 <!-- use-splicing This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splicing --> 577 <!-- use-splicing This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splicing -->
578 <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) --> 578 <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) -->
579 <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/> 579 <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/>
580 <param name="localsplicedist" type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/> 580 <param name="localsplicedist" type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/>
581 <param name="local_splice_penalty" type="integer" value="" optional="true" label="Penalty for a local splice (default 0). Counts against mismatches allowed"/> 581 <param name="local_splice_penalty" type="integer" value="" optional="true" label="Penalty for a local splice (default 0). Counts against mismatches allowed"/>
582 <param name="distant_splice_penalty" type="integer" value="" optional="true" label="Penalty for a distant splice (default 3). Counts against mismatches allowed" 582 <param name="distant_splice_penalty" type="integer" value="" optional="true" label="Penalty for a distant splice (default 3). Counts against mismatches allowed"
585 <param name="distant_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for distant spliced alignments" 585 <param name="distant_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for distant spliced alignments"
586 help="(default 16, min is the kmer length)"/> 586 help="(default 16, min is the kmer length)"/>
587 <param name="shortend_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for short-end spliced alignments" 587 <param name="shortend_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for short-end spliced alignments"
588 help="(default 2, but unless known splice sites are provided, GSNAP may still need the end length to be the value of kmer size to find a given splice"/> 588 help="(default 2, but unless known splice sites are provided, GSNAP may still need the end length to be the value of kmer size to find a given splice"/>
589 <param name="distant_splice_identity" type="float" value="" optional="true" label="Minimum identity at end required for distant spliced alignments (default 0.95)"/> 589 <param name="distant_splice_identity" type="float" value="" optional="true" label="Minimum identity at end required for distant spliced alignments (default 0.95)"/>
590 <param name="antistranded_penalty" type="integer" value="" optional="true" label="Penalty for antistranded splicing when using stranded RNA-Seq protocols" 590 <param name="antistranded_penalty" type="integer" value="" optional="true" label="Penalty for antistranded splicing when using stranded RNA-Seq protocols"
591 help="A positive value, such as 1, expects antisense on the first read and sense on the second read. 591 help="A positive value, such as 1, expects antisense on the first read and sense on the second read.
592 Default is 0, which treats sense and antisense equally well"/> 592 Default is 0, which treats sense and antisense equally well"/>
593 </when> 593 </when>
594 </conditional> 594 </conditional>
595 595
596 <!-- Output data --> 596 <!-- Output data -->
600 <option value="advanced">Set Output Options</option> 600 <option value="advanced">Set Output Options</option>
601 </param> 601 </param>
602 <when value="default"/> 602 <when value="default"/>
603 <when value="advanced"> 603 <when value="advanced">
604 <param name="npath" type="integer" value="" optional="true" label="Maximum number of paths to print (default 100)"/> 604 <param name="npath" type="integer" value="" optional="true" label="Maximum number of paths to print (default 100)"/>
605 <param name="quiet_if_excessive" type="boolean" checked="false" truevalue="--quiet-if-excessive" falsevalue="" label="Quiet if Excessive" 605 <param name="quiet_if_excessive" type="boolean" checked="false" truevalue="--quiet-if-excessive" falsevalue="" label="Quiet if Excessive"
606 help="If more than maximum number of paths are found, then nothing is printed."/> 606 help="If more than maximum number of paths are found, then nothing is printed."/>
607 <param name="show_refdiff" type="boolean" checked="false" truevalue="--show-refdiff" falsevalue="" label="Show SNP-tolerant alignment" 607 <param name="show_refdiff" type="boolean" checked="false" truevalue="--show-refdiff" falsevalue="" label="Show SNP-tolerant alignment"
608 help="For GSNAP output in SNP-tolerant alignment, shows all differences relative to the reference genome as lower case (otherwise, it shows all differences relative to both the reference and alternate genome)"/> 608 help="For GSNAP output in SNP-tolerant alignment, shows all differences relative to the reference genome as lower case (otherwise, it shows all differences relative to both the reference and alternate genome)"/>
609 <param name="clip_overlap" type="boolean" checked="false" truevalue="--clip-overlap" falsevalue="" label="Clip Overlap" 609 <param name="clip_overlap" type="boolean" checked="false" truevalue="--clip-overlap" falsevalue="" label="Clip Overlap"
610 help="For paired-end reads whose alignments overlap, clip the overlapping region."/> 610 help="For paired-end reads whose alignments overlap, clip the overlapping region."/>
611 </when> 611 </when>
612 </conditional> 612 </conditional>
613 <conditional name="result"> 613 <conditional name="result">
614 <param name="format" type="select" label="Select the output format" help=""> 614 <param name="format" type="select" label="Select the output format" help="">
638 --> 638 -->
639 </conditional> 639 </conditional>
640 <!-- TODO combine fails and split_output --> 640 <!-- TODO combine fails and split_output -->
641 641
642 <conditional name="results"> 642 <conditional name="results">
643 <param name="split_output" type="select" label="&lt;HR&gt;Split outputs" 643 <param name="split_output" type="select" label="&lt;HR&gt;Split outputs"
644 help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results"> 644 help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results">
645 <option value="no">no</option> 645 <option value="no">no</option>
646 <option value="yes">yes</option> 646 <option value="yes">yes</option>
647 </param> 647 </param>
648 <when value="no"> 648 <when value="no">
649 <conditional name="fails"> 649 <conditional name="fails">
653 <option value="failsonly">failsonly - only output failing results</option> 653 <option value="failsonly">failsonly - only output failing results</option>
654 </param> 654 </param>
655 <when value="default"/> 655 <when value="default"/>
656 <when value="nofails"/> 656 <when value="nofails"/>
657 <when value="failsonly"> 657 <when value="failsonly">
658 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format" 658 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
659 help=""/> 659 help=""/>
660 </when> 660 </when>
661 </conditional> 661 </conditional>
662 </when> 662 </when>
663 <when value="yes"> 663 <when value="yes">
664 <conditional name="fails"> 664 <conditional name="fails">
669 </param> 669 </param>
670 <when value="default"/> 670 <when value="default"/>
671 <when value="nofails"/> 671 <when value="nofails"/>
672 <when value="failsonly"/> 672 <when value="failsonly"/>
673 </conditional> 673 </conditional>
674 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format" 674 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
675 help=""/> 675 help=""/>
676 </when> 676 </when>
677 </conditional> 677 </conditional>
678 678
679 </inputs> 679 </inputs>
680 <outputs> 680 <outputs>
806 </data> 806 </data>
807 --> 807 -->
808 808
809 </outputs> 809 </outputs>
810 <tests> 810 <tests>
811 </tests> 811 </tests>
812 812
813 <help> 813 <help>
814 814
815 **What it does** 815 **What it does**
816 816
817 GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc. 817 GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.
818 Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10. 818 Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10.
819 819
820 .. _GSNAP: http://research-pub.gene.com/gmap/ 820 .. _GSNAP: http://research-pub.gene.com/gmap/
821 .. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873 821 .. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873
822 http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed 822 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed
823 823
824 ------ 824 ------
825 825
826 **Know what you are doing** 826 **Know what you are doing**
827 827
833 833
834 ------ 834 ------
835 835
836 **Input formats** 836 **Input formats**
837 837
838 Input to GSNAP should be either in FASTQ or FASTA format. 838 Input to GSNAP should be either in FASTQ or FASTA format.
839 839
840 The FASTQ input may include quality scores, which will then be included in SAM 840 The FASTQ input may include quality scores, which will then be included in SAM
841 output, if that output format is selected. 841 output, if that output format is selected.
842 842
843 For FASTA format, you should include one line per read (or end of a 843 For FASTA format, you should include one line per read (or end of a
844 paired-end read). The same FASTA file can have a mixture of 844 paired-end read). The same FASTA file can have a mixture of
845 single-end and paired-end reads of varying lengths, if desired. 845 single-end and paired-end reads of varying lengths, if desired.
846 846
878 878
879 SAM output format 879 SAM output format
880 880
881 Default GSNAP format 881 Default GSNAP format
882 See the README_ 882 See the README_
883
884
885
886
887 </help> 883 </help>
884 <citations>
885 <citation type="doi">10.1093/bioinformatics/btq057</citation>
886 </citations>
888 </tool> 887 </tool>
889 888