comparison gmap.xml @ 3:488e9d642566 draft

GMAP wrappers v3.0.1 after linting and cleanup, still untested work-in-progress
author peterjc
date Wed, 28 Sep 2016 10:47:28 -0400
parents f6ba0f12cca2
children 14561eb803a5
comparison
equal deleted inserted replaced
2:f6ba0f12cca2 3:488e9d642566
1 <tool id="gmap" name="GMAP" version="3.0.0"> 1 <tool id="gmap" name="GMAP" version="3.0.1">
2 <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description> 2 <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2013-05-09">gmap</requirement> 4 <requirement type="package" version="2013-05-09">gmap</requirement>
5 </requirements> 5 </requirements>
6 <version_string>gmap --version</version_string> 6 <version_command>gmap --version</version_command>
7 <command> 7 <command>
8 #import os,os.path 8 #import os,os.path
9 gmap 9 gmap
10 --nthreads=4 --ordered 10 --nthreads=4 --ordered
11 #if $refGenomeSource.genomeSource == "history": 11 #if $refGenomeSource.genomeSource == "history":
39 --protein_dna 39 --protein_dna
40 #elif $result.format == "protein_gen": 40 #elif $result.format == "protein_gen":
41 --protein_gen 41 --protein_gen
42 #elif $result.format == "sam": 42 #elif $result.format == "sam":
43 --format=$result.sam_paired_read 43 --format=$result.sam_paired_read
44 $result.no_sam_headers 44 $result.no_sam_headers
45 $result.sam_use_0M 45 $result.sam_use_0M
46 $result.force_xs_dir 46 $result.force_xs_dir
47 $result.md_lowercase_snp 47 $result.md_lowercase_snp
48 #* Removed in gmap version 2011-11-30 48 #* Removed in gmap version 2011-11-30
49 #if len($result.noncanonical_splices.__str__) > 0 49 #if len($result.noncanonical_splices.__str__) > 0
125 $input 125 $input
126 #for $i in $inputs: 126 #for $i in $inputs:
127 ${i.added_input} 127 ${i.added_input}
128 #end for 128 #end for
129 #if $split_output == True 129 #if $split_output == True
130 2> $gmap_stderr 130 2> $gmap_stderr
131 #else 131 #else
132 2> $gmap_stderr > $output 132 2> $gmap_stderr > $output
133 #end if 133 #end if
134 </command> 134 </command>
135 <inputs> 135 <inputs>
192 <filter type="sort_by" column="4"/> 192 <filter type="sort_by" column="4"/>
193 </options> 193 </options>
194 </param> 194 </param>
195 </when> 195 </when>
196 <when value="gmapdb"> 196 <when value="gmapdb">
197 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 197 <param name="gmapdb" type="data" format="gmapdb" label="Select a gmapdb"
198 help="A GMAP database built with GMAP Build"/> 198 help="A GMAP database built with GMAP Build"/>
199 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size"> 199 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
200 <options> 200 <options>
201 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/> 201 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
202 </options> 202 </options>
206 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/> 206 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
207 </options> 207 </options>
208 </param> 208 </param>
209 </when> 209 </when>
210 <when value="history"> 210 <when value="history">
211 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" 211 <param name="ownFile" type="data" format="fasta" label="Select the reference genome"
212 help="Fasta containing genomic DNA sequence"/> 212 help="Fasta containing genomic DNA sequence"/>
213 </when> 213 </when>
214 </conditional> 214 </conditional>
215 215
216 216
217 <!-- Computation options --> 217 <!-- Computation options -->
218 <conditional name="computation"> 218 <conditional name="computation">
219 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help=""> 219 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
220 <option value="default">Use default settings</option> 220 <option value="default">Use default settings</option>
221 <option value="advanced">Set Computation Options</option> 221 <option value="advanced">Set Computation Options</option>
222 </param> 222 </param>
223 <when value="default"/> 223 <when value="default"/>
224 <when value="advanced"> 224 <when value="advanced">
225 <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/> 225 <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/>
226 <param name="min_intronlength" type="integer" value="" optional="true" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." > 226 <param name="min_intronlength" type="integer" value="" optional="true" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." >
227 <validator type="in_range" message="min_intronlength must be positive" min="0" /> 227 <validator type="in_range" message="min_intronlength must be positive" min="0" />
228 </param> 228 </param>
229 <param name="intronlength" type="integer" value="" optional="true" label="Max length for one intron (default 1000000)" > 229 <param name="intronlength" type="integer" value="" optional="true" label="Max length for one intron (default 1000000)" >
230 <validator type="in_range" message="intronlength must be positive" min="0" /> 230 <validator type="in_range" message="intronlength must be positive" min="0" />
231 </param> 231 </param>
232 <param name="localsplicedist" type="integer" value="" optional="true" label="Max length for known splice sites at ends of sequence (default 200000)" > 232 <param name="localsplicedist" type="integer" value="" optional="true" label="Max length for known splice sites at ends of sequence (default 200000)" >
233 <validator type="in_range" message="localsplicedist must be positive" min="0" /> 233 <validator type="in_range" message="localsplicedist must be positive" min="0" />
234 </param> 234 </param>
235 <param name="totallength" type="integer" value="" optional="true" label="Max total intron length (default 2400000)" > 235 <param name="totallength" type="integer" value="" optional="true" label="Max total intron length (default 2400000)" >
236 <validator type="in_range" message="totallength must be positive" min="0" /> 236 <validator type="in_range" message="totallength must be positive" min="0" />
237 </param> 237 </param>
238 <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera" 238 <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera"
239 help=" default is 40, To turn off, set to 0" > 239 help=" default is 40, To turn off, set to 0" >
240 <validator type="in_range" message="chimera_margin must be positive" min="0" /> 240 <validator type="in_range" message="chimera_margin must be positive" min="0" />
241 </param> 241 </param>
242 <param name="direction" type="select" label="cDNA direction"> 242 <param name="direction" type="select" label="cDNA direction">
243 <option value="auto">auto</option> 243 <option value="auto">auto</option>
244 <option value="sense_force">sense_force</option> 244 <option value="sense_force">sense_force</option>
245 <option value="antisense_force">antisense_force</option> 245 <option value="antisense_force">antisense_force</option>
246 <option value="sense_filter">sense_filter</option> 246 <option value="sense_filter">sense_filter</option>
247 <option value="antisense_filter">antisense_filter</option> 247 <option value="antisense_filter">antisense_filter</option>
248 </param> 248 </param>
249 <param name="trimendexons" type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" > 249 <param name="trimendexons" type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" >
250 <validator type="in_range" message="trimendexons must be positive" min="1" /> 250 <validator type="in_range" message="trimendexons must be positive" min="1" />
251 </param> 251 </param>
252 <param name="find_shifted_canonical" type="boolean" truevalue="--find-shifted-canonical-species" falsevalue="" checked="false" label="find-shifted-canonical Use a more sensitive search for canonical splicing" help=""/> 252 <param name="find_shifted_canonical" type="boolean" truevalue="--find-shifted-canonical-species" falsevalue="" checked="false" label="find-shifted-canonical Use a more sensitive search for canonical splicing" help=""/>
253 <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/> 253 <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>
254 254
255 <param name="canonical" type="select" label="Reward for canonical and semi-canonical introns"> 255 <param name="canonical" type="select" label="Reward for canonical and semi-canonical introns">
256 <option value="1">high reward (default)</option> 256 <option value="1">high reward (default)</option>
257 <option value="0">low reward</option> 257 <option value="0">low reward</option>
258 <option value="2">low reward for high-identity sequences</option> 258 <option value="2">low reward for high-identity sequences</option>
259 </param> 259 </param>
260 <param name="allow_close_indels" type="select" label="Allow an insertion and deletion close to each other"> 260 <param name="allow_close_indels" type="select" label="Allow an insertion and deletion close to each other">
261 <option value="1" selected="true">yes (default)</option> 261 <option value="1" selected="true">yes (default)</option>
262 <option value="0">no</option> 262 <option value="0">no</option>
263 <option value="2">only for high-quality alignments</option> 263 <option value="2">only for high-quality alignments</option>
264 </param> 264 </param>
265 <param name="microexon_spliceprob" type="float" value="" optional="true" label="Micro Exon splice probablility threshold" 265 <param name="microexon_spliceprob" type="float" value="" optional="true" label="Micro Exon splice probablility threshold"
266 help="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" > 266 help="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >
267 <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/> 267 <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/>
268 </param> 268 </param>
269 <param name="prunelevel" type="select" label="Pruning level"> 269 <param name="prunelevel" type="select" label="Pruning level">
270 <option value="0">no pruning (default)</option> 270 <option value="0">no pruning (default)</option>
271 <option value="1">poor sequences</option> 271 <option value="1">poor sequences</option>
272 <option value="2">repetitive sequences</option> 272 <option value="2">repetitive sequences</option>
273 <option value="3">poor and repetitive sequences</option> 273 <option value="3">poor and repetitive sequences</option>
274 </param> 274 </param>
275 <!-- could do this as a config file 275 <!-- could do this as a config file
276 <param name="chrsubsetfile" type="data" format="fasta" label="User-supplied chromosome subset file" /> 276 <param name="chrsubsetfile" type="data" format="fasta" label="User-supplied chromosome subset file" />
277 <param name="chrsubset" type="text" label="Chromosome subset to search" /> 277 <param name="chrsubset" type="text" label="Chromosome subset to search" />
278 --> 278 -->
279 </when> 279 </when>
280 </conditional> 280 </conditional>
291 <param name="invertmode" type="select" label=" Mode for alignments to genomic (-) strand" help=""> 291 <param name="invertmode" type="select" label=" Mode for alignments to genomic (-) strand" help="">
292 <option value="">Don't invert the cDNA (default)</option> 292 <option value="">Don't invert the cDNA (default)</option>
293 <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option> 293 <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option>
294 <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option> 294 <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option>
295 </param> 295 </param>
296 <param name="introngap" type="integer" value="" optional="true" label="Nucleotides to show on each end of intron (default=3)"> 296 <param name="introngap" type="integer" value="" optional="true" label="Nucleotides to show on each end of intron (default=3)">
297 <validator type="in_range" message="introngap must be positive" min="0" /> 297 <validator type="in_range" message="introngap must be positive" min="0" />
298 </param> 298 </param>
299 <param name="wraplength" type="integer" value="" optional="true" label="Line Wrap length for alignment (default=50)"> 299 <param name="wraplength" type="integer" value="" optional="true" label="Line Wrap length for alignment (default=50)">
300 <validator type="in_range" message="wraplength must be positive" min="1" /> 300 <validator type="in_range" message="wraplength must be positive" min="1" />
301 </param> 301 </param>
302 <param name="npaths" type="integer" value="" optional="true" 302 <param name="npaths" type="integer" value="" optional="true"
303 label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." > 303 label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." >
304 <validator type="in_range" message="npaths must be positive" min="0" /> 304 <validator type="in_range" message="npaths must be positive" min="0" />
305 </param> 305 </param>
306 <param name="suboptimal_score" type="integer" value="" optional="true" 306 <param name="suboptimal_score" type="integer" value="" optional="true"
307 label="Report only paths whose score is within this value of the best path" 307 label="Report only paths whose score is within this value of the best path"
308 help="By default the program prints all paths found." > 308 help="By default the program prints all paths found." >
309 <validator type="in_range" message="suboptimal_score must be positive" min="0" /> 309 <validator type="in_range" message="suboptimal_score must be positive" min="0" />
310 </param> 310 </param>
311 <param name="chimera_overlap" type="integer" value="" optional="true" label="Overlap to show, if any, at chimera breakpoint (default 0)" > 311 <param name="chimera_overlap" type="integer" value="" optional="true" label="Overlap to show, if any, at chimera breakpoint (default 0)" >
312 <validator type="in_range" message="chimera_overlap must be positive" min="0" /> 312 <validator type="in_range" message="chimera_overlap must be positive" min="0" />
313 </param> 313 </param>
314 <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue="" 314 <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue=""
315 label="Translates cDNA with corrections for frameshifts"/> 315 label="Translates cDNA with corrections for frameshifts"/>
316 <param name="protein" type="select" label="Protein alignment" help=""> 316 <param name="protein" type="select" label="Protein alignment" help="">
317 <option value="">default</option> 317 <option value="">default</option>
318 <option value="--fulllength=true">Assume full-length protein, starting with Met</option> 318 <option value="--fulllength=true">Assume full-length protein, starting with Met</option>
319 <option value="--truncate=true">Truncate alignment around full-length protein, Met to Stop</option> 319 <option value="--truncate=true">Truncate alignment around full-length protein, Met to Stop</option>
381 <param name="read_group_id" type="text" value="" label="Value to put into read-group id (RG-ID) field"/> 381 <param name="read_group_id" type="text" value="" label="Value to put into read-group id (RG-ID) field"/>
382 <param name="read_group_name" type="text" value="" label="Value to put into read-group name (RG-SM) field"/> 382 <param name="read_group_name" type="text" value="" label="Value to put into read-group name (RG-SM) field"/>
383 <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/> 383 <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/>
384 <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/> 384 <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/>
385 <param name="sam_use_0M" type="boolean" truevalue="--sam-use-0M" falsevalue="" checked="false" label="Insert 0M in CIGAR between adjacent insertions and deletions" help="Required by Picard, but can cause errors in other tools"/> 385 <param name="sam_use_0M" type="boolean" truevalue="--sam-use-0M" falsevalue="" checked="false" label="Insert 0M in CIGAR between adjacent insertions and deletions" help="Required by Picard, but can cause errors in other tools"/>
386 <param name="force_xs_dir" type="boolean" truevalue="--force-xs-dir" falsevalue="" checked="false" label="Force direction (disallow XS:A:?)" 386 <param name="force_xs_dir" type="boolean" truevalue="--force-xs-dir" falsevalue="" checked="false" label="Force direction (disallow XS:A:?)"
387 help="For RNA-Seq alignments, disallows XS:A:? when the sense direction is unclear, and replaces this value arbitrarily with XS:A:+. May be useful for some programs, such as Cufflinks, that cannot handle XS:A:?. However, if you use this flag, the reported value of XS:A:+ in these cases will not be meaningful."/> 387 help="For RNA-Seq alignments, disallows XS:A:? when the sense direction is unclear, and replaces this value arbitrarily with XS:A:+. May be useful for some programs, such as Cufflinks, that cannot handle XS:A:?. However, if you use this flag, the reported value of XS:A:+ in these cases will not be meaningful."/>
388 <param name="md_lowercase_snp" type="boolean" truevalue="--md-lowercase-snp" falsevalue="" checked="false" label="MD lowercase SNP" 388 <param name="md_lowercase_snp" type="boolean" truevalue="--md-lowercase-snp" falsevalue="" checked="false" label="MD lowercase SNP"
389 help="In MD string, when known SNPs are given by the -v flag, prints difference nucleotides as lower-case when they, differ from reference but match a known alternate allele"/> 389 help="In MD string, when known SNPs are given by the -v flag, prints difference nucleotides as lower-case when they, differ from reference but match a known alternate allele"/>
390 </when> 390 </when>
391 </conditional> <!-- name="result" --> 391 </conditional> <!-- name="result" -->
392 392
393 <param name="split_output" type="boolean" truevalue="--split-output=gmap_out" falsevalue="" checked="false" label="Separate outputs for nomapping, uniq, mult, and chimera" help="(chimera only when chimera-margin is selected)"/> 393 <param name="split_output" type="boolean" truevalue="--split-output=gmap_out" falsevalue="" checked="false" label="Separate outputs for nomapping, uniq, mult, and chimera" help="(chimera only when chimera-margin is selected)"/>
394 394
395 395
396 <!-- 396 <!--
397 map=iitfile Map file. If argument is '?' (with the quotes), this lists available map files. 397 map=iitfile Map file. If argument is '?' (with the quotes), this lists available map files.
398 mapexons Map each exon separately 398 mapexons Map each exon separately
399 mapboth Report hits from both strands of genome 399 mapboth Report hits from both strands of genome
400 flanking=INT Show flanking hits (default 0) 400 flanking=INT Show flanking hits (default 0)
401 print-comment Show comment line for each hit 401 print-comment Show comment line for each hit
402 --> 402 -->
403 403
404 <!-- 404 <!--
405 min-trimmed-coverage=FLOAT Do not print alignments with trimmed coverage less 405 min-trimmed-coverage=FLOAT Do not print alignments with trimmed coverage less
406 this value (default=0.0, which means no filtering) 406 this value (default=0.0, which means no filtering)
407 Note that chimeric alignments will be output regardless 407 Note that chimeric alignments will be output regardless
408 of this filter 408 of this filter
409 min-identity=FLOAT Do not print alignments with identity less 409 min-identity=FLOAT Do not print alignments with identity less
482 <when input="result['format']" value="map_exons" format="gmap_annotation"/> 482 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
483 </change_format> 483 </change_format>
484 </data> 484 </data>
485 </outputs> 485 </outputs>
486 <tests> 486 <tests>
487 </tests> 487 </tests>
488 488
489 <help> 489 <help>
490 490
491 **What it does** 491 **What it does**
492 492
493 GMAP_ (Genomic Mapping and Alignment Program) The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains. It is developed by Thomas D. Wu of Genentech, Inc. 493 GMAP_ (Genomic Mapping and Alignment Program) The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains. It is developed by Thomas D. Wu of Genentech, Inc.
494 494
495 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310 495 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
496 496
497 .. _GMAP: http://research-pub.gene.com/gmap/ 497 .. _GMAP: http://research-pub.gene.com/gmap/
498 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859 498 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
504 .. class:: warningmark 504 .. class:: warningmark
505 505
506 You will want to read the README_ 506 You will want to read the README_
507 507
508 .. _README: http://research-pub.gene.com/gmap/src/README 508 .. _README: http://research-pub.gene.com/gmap/src/README
509
510 </help> 509 </help>
510 <citations>
511 <citation type="doi">10.1093/bioinformatics/bti310</citation>
512 </citations>
511 </tool> 513 </tool>
512 514