comparison gmap.xml @ 0:10e3476429b5 draft

Uploaded
author jjohnson
date Fri, 05 Oct 2012 13:51:49 -0400
parents
children 74391fc6e3f2
comparison
equal deleted inserted replaced
-1:000000000000 0:10e3476429b5
1 <tool id="gmap" name="GMAP" version="2.0.1">
2 <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description>
3 <requirements>
4 <requirement type="binary">gmap</requirement>
5 </requirements>
6 <version_string>gmap --version</version_string>
7 <command>
8 #import os,os.path
9 gmap
10 --nthreads=4 --ordered
11 #if $refGenomeSource.genomeSource == "history":
12 --gseg=$refGenomeSource.ownFile
13 #elif $refGenomeSource.genomeSource == "gmapdb":
14 #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
15 --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb
16 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
17 --kmer=$refGenomeSource.kmer
18 #end if
19 #else:
20 --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
21 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
22 --kmer=$refGenomeSource.kmer
23 #end if
24 #end if
25 #if $result.format == "summary":
26 --summary
27 #elif $result.format == "align":
28 --align
29 #elif $result.format == "continuous":
30 --continuous
31 #elif $result.format == "continuous-by-exon":
32 --continuous-by-exon
33 #elif $result.format == "compress":
34 --compress
35 #elif $result.format == "exons_dna":
36 --exons=cdna
37 #elif $result.format == "exons_gen":
38 --exons=genomic
39 #elif $result.format == "protein_dna":
40 --protein_dna
41 #elif $result.format == "protein_gen":
42 --protein_gen
43 #elif $result.format == "sam":
44 --format=$result.sam_paired_read
45 $result.no_sam_headers
46 #* Removed in gmap version 2011-11-30
47 #if len($result.noncanonical_splices.__str__) > 0
48 --noncanonical-splices=$result.noncanonical_splices
49 #end if
50 *#
51 #if len($result.read_group_id.__str__) > 0
52 --read-group-id=$result.read_group_id
53 #end if
54 #if len($result.read_group_name.__str__) > 0
55 --read-group-name=$result.read_group_name
56 #end if
57 #if len($result.read_group_library.__str__) > 0
58 --read-group-library=$result.read_group_library
59 #end if
60 #if len($result.read_group_platform.__str__) > 0
61 --read-group-platform=$result.read_group_platform
62 #end if
63 #elif $result.format != "gmap":
64 --format=$result.format
65 #end if
66 #if $computation.options == "advanced":
67 $computation.nosplicing
68 $computation.cross_species
69 #if len($computation.min_intronlength.__str__) > 0
70 --min-intronlength=$computation.min_intronlength
71 #end if
72 #if len($computation.intronlength.__str__) > 0
73 --intronlength=$computation.intronlength
74 #end if
75 #if len($computation.localsplicedist.__str__) > 0
76 --localsplicedist=$computation.localsplicedist
77 #end if
78 #if len($computation.totallength.__str__) > 0
79 --totallength=$computation.totallength
80 #end if
81 #if len($computation.trimendexons.__str__) > 0
82 --trimendexons=$computation.trimendexons
83 #end if
84 --direction=$computation.direction
85 --canonical-mode=$computation.canonical
86 --prunelevel=$computation.prunelevel
87 --allow-close-indels=$computation.allow_close_indels
88 #if len($computation.microexon_spliceprob.__str__) >= 0:
89 --microexon-spliceprob=$computation.microexon_spliceprob
90 #end if
91 #if len($computation.chimera_margin.__str__) >= 0:
92 --chimera-margin=$computation.chimera_margin
93 #end if
94 #end if
95 #if $advanced.options == "used":
96 #if len($advanced.npaths.__str__) > 0:
97 --npaths=$advanced.npaths
98 #end if
99 #if len($advanced.suboptimal_score.__str__) > 0:
100 --suboptimal-score=$advanced.suboptimal_score
101 #end if
102 #if len($advanced.chimera_overlap.__str__) > 0:
103 --chimera_overlap=$advanced.chimera_overlap
104 #end if
105 $advanced.protein
106 $advanced.tolerant
107 $advanced.nolengths
108 $advanced.invertmode
109 #if len($advanced.introngap.__str__) > 0:
110 --introngap=$advanced.introngap
111 #end if
112 #if len($advanced.wraplength.__str__) > 0:
113 --wraplength=$advanced.wraplength
114 #end if
115 #end if
116 #if $split_output == True
117 $split_output
118 #end if
119 #if len($quality_protocol.__str__) > 0:
120 --quality-protocol=$quality_protocol
121 #end if
122 $input
123 #for $i in $inputs:
124 ${i.added_input}
125 #end for
126 #if $split_output == True
127 2> $gmap_stderr
128 #else
129 2> $gmap_stderr > $output
130 #end if
131 </command>
132 <inputs>
133 <!-- Input data -->
134 <param name="input" type="data" format="fasta,fastqsanger,fastqillumina" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select an mRNA or EST dataset to map" />
135 <repeat name="inputs" title="addtional mRNA or EST dataset to map">
136 <param name="added_input" type="data" format="fasta,fastqsanger,fastqillumina" label=""/>
137 </repeat>
138 <param name="quality_protocol" type="select" label="Protocol for input quality scores">
139 <option value="">No quality scores</option>
140 <option value="sanger">Sanger quality scores</option>
141 <option value="illumina">Illumina quality scores</option>
142 </param>
143
144 <!-- GMAPDB for mapping -->
145 <conditional name="refGenomeSource">
146 <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Map To&lt;/H2&gt;Will you map to a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
147 <option value="indexed">Use a built-in index</option>
148 <option value="gmapdb">Use gmapdb from the history</option>
149 <option value="history">Use a fasta reference sequence from the history</option>
150 </param>
151 <when value="indexed">
152 <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
153 <options from_file="gmap_indices.loc">
154 <column name="uid" index="0" />
155 <column name="dbkey" index="1" />
156 <column name="name" index="2" />
157 <column name="kmers" index="3" />
158 <column name="maps" index="4" />
159 <column name="snps" index="5" />
160 <column name="value" index="6" />
161 </options>
162 </param>
163 <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
164 <options from_file="gmap_indices.loc">
165 <column name="name" index="3"/>
166 <column name="value" index="3"/>
167 <filter type="param_value" ref="gmapindex" column="6"/>
168 <filter type="multiple_splitter" column="3" separator=","/>
169 <filter type="add_value" name="" value=""/>
170 <filter type="sort_by" column="3"/>
171 </options>
172 </param>
173 <param name="map" type="select" data_ref="gmapindex" label="Look for splicing involving known sites or known introns" help="">
174 <options from_file="gmap_indices.loc">
175 <column name="name" index="4"/>
176 <column name="value" index="4"/>
177 <filter type="param_value" ref="gmapindex" column="6"/>
178 <filter type="multiple_splitter" column="4" separator=","/>
179 <filter type="add_value" name="" value=""/>
180 <filter type="sort_by" column="4"/>
181 </options>
182 </param>
183 </when>
184 <when value="gmapdb">
185 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
186 help="A GMAP database built with GMAP Build"/>
187 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
188 <options>
189 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
190 </options>
191 </param>
192 <param name="map" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
193 <options>
194 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
195 </options>
196 </param>
197 </when>
198 <when value="history">
199 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome"
200 help="Fasta containing genomic DNA sequence"/>
201 </when>
202 </conditional>
203
204
205 <!-- Computation options -->
206 <conditional name="computation">
207 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
208 <option value="default">Use default settings</option>
209 <option value="advanced">Set Computation Options</option>
210 </param>
211 <when value="default"/>
212 <when value="advanced">
213 <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/>
214 <param name="min_intronlength" type="integer" value="" optional="true" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." >
215 <validator type="in_range" message="min_intronlength must be positive" min="0" />
216 </param>
217 <param name="intronlength" type="integer" value="" optional="true" label="Max length for one intron (default 1000000)" >
218 <validator type="in_range" message="intronlength must be positive" min="0" />
219 </param>
220 <param name="localsplicedist" type="integer" value="" optional="true" label="Max length for known splice sites at ends of sequence (default 200000)" >
221 <validator type="in_range" message="localsplicedist must be positive" min="0" />
222 </param>
223 <param name="totallength" type="integer" value="" optional="true" label="Max total intron length (default 2400000)" >
224 <validator type="in_range" message="totallength must be positive" min="0" />
225 </param>
226 <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera"
227 help=" default is 40, To turn off, set to a large value (greater than the query length)" >
228 <validator type="in_range" message="chimera_margin must be positive" min="0" />
229 </param>
230 <param name="direction" type="select" label="cDNA direction">
231 <option value="auto">auto</option>
232 <option value="sense_force">sense_force</option>
233 <option value="antisense_force">antisense_force</option>
234 <option value="sense_filter">sense_filter</option>
235 <option value="antisense_filter">antisense_filter</option>
236 </param>
237 <param name="trimendexons" type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" >
238 <validator type="in_range" message="trimendexons must be positive" min="1" />
239 </param>
240 <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>
241
242 <param name="canonical" type="select" label="Reward for canonical and semi-canonical introns">
243 <option value="1">high reward (default)</option>
244 <option value="0">low reward</option>
245 <option value="2">low reward for high-identity sequences</option>
246 </param>
247 <param name="allow_close_indels" type="select" label="Allow an insertion and deletion close to each other">
248 <option value="1" selected="true">yes (default)</option>
249 <option value="0">no</option>
250 <option value="2">only for high-quality alignments</option>
251 </param>
252 <param name="microexon_spliceprob" type="float" value="" optional="true" label="Micro Exon splice probablility threshold"
253 help="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >
254 <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/>
255 </param>
256 <param name="prunelevel" type="select" label="Pruning level">
257 <option value="0">no pruning (default)</option>
258 <option value="1">poor sequences</option>
259 <option value="2">repetitive sequences</option>
260 <option value="3">poor and repetitive sequences</option>
261 </param>
262 <!-- could do this as a config file
263 <param name="chrsubsetfile" type="data" format="fasta" label="User-supplied chromosome subset file" />
264 <param name="chrsubset" type="text" label="Chromosome subset to search" />
265 -->
266 </when>
267 </conditional>
268
269 <!-- Advanced Settings -->
270 <conditional name="advanced">
271 <param name="options" type="select" label="&lt;HR&gt;Advanced Settings" help="">
272 <option value="default">Use default settings</option>
273 <option value="used">Set Options</option>
274 </param>
275 <when value="default"/>
276 <when value="used">
277 <param name="nolengths" type="boolean" checked="false" truevalue="--nolengths=true" falsevalue="" label="No intron lengths in alignment"/>
278 <param name="invertmode" type="select" label=" Mode for alignments to genomic (-) strand" help="">
279 <option value="">Don't invert the cDNA (default)</option>
280 <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option>
281 <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option>
282 </param>
283 <param name="introngap" type="integer" value="" optional="true" label="Nucleotides to show on each end of intron (default=3)">
284 <validator type="in_range" message="introngap must be positive" min="0" />
285 </param>
286 <param name="wraplength" type="integer" value="" optional="true" label="Line Wrap length for alignment (default=50)">
287 <validator type="in_range" message="wraplength must be positive" min="1" />
288 </param>
289 <param name="npaths" type="integer" value="" optional="true"
290 label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." >
291 <validator type="in_range" message="npaths must be positive" min="0" />
292 </param>
293 <param name="suboptimal_score" type="integer" value="" optional="true"
294 label="Report only paths whose score is within this value of the best path"
295 help="By default the program prints all paths found." >
296 <validator type="in_range" message="suboptimal_score must be positive" min="0" />
297 </param>
298 <param name="chimera_overlap" type="integer" value="" optional="true" label="Overlap to show, if any, at chimera breakpoint (default 0)" >
299 <validator type="in_range" message="chimera_overlap must be positive" min="0" />
300 </param>
301 <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue=""
302 label="Translates cDNA with corrections for frameshifts"/>
303 <param name="protein" type="select" label="Protein alignment" help="">
304 <option value="">default</option>
305 <option value="--fulllength=true">Assume full-length protein, starting with Met</option>
306 <option value="--truncate=true">Truncate alignment around full-length protein, Met to Stop</option>
307 </param>
308 </when>
309 </conditional>
310
311 <!-- Output data -->
312 <conditional name="result">
313 <param name="format" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Select the output format" help="">
314 <option value="gmap">GMAP default output</option>
315 <option value="summary">Summary of alignments</option>
316 <option value="align">Alignment</option>
317 <option value="continuous">Alignment in three continuous lines</option>
318 <option value="continuous-by-exon">Alignment in three lines per exon</option>
319 <option value="compress">Print output in compressed format</option>
320 <option value="exons_dna">Print exons cDNA</option>
321 <option value="exons_gen">Print exons genomic</option>
322 <option value="protein_dna">Print protein sequence (cDNA)</option>
323 <option value="protein_gen">Print protein sequence (genomic)</option>
324 <option value="psl">PSL (BLAT) format</option>
325 <option value="gff3_gene">GFF3 gene format</option>
326 <option value="gff3_match_cdna">GFF3 match cDNA format</option>
327 <option value="gff3_match_est">GFF3 match EST format</option>
328 <option value="splicesites">splicesites output (for GSNAP)</option>
329 <option value="introns">introns output (for GSNAP)</option>
330 <option value="map_exons">IIT FASTA exon map format</option>
331 <option value="map_genes">IIT FASTA map format</option>
332 <option value="coords">coords in table format</option>
333 <option value="sam" selected="true">SAM format</option>
334 </param>
335 <when value="gmap">
336 </when>
337 <when value="summary"/>
338 <when value="align">
339 </when>
340 <when value="continuous">
341 </when>
342 <when value="continuous-by-exon">
343 </when>
344 <when value="compress"/>
345 <when value="exons_dna"/>
346 <when value="exons_gen"/>
347 <when value="protein_dna"/>
348 <when value="protein_gen"/>
349 <when value="psl"/>
350 <when value="gff3_gene"/>
351 <when value="gff3_match_cdna"/>
352 <when value="gff3_match_est"/>
353 <when value="splicesites"/>
354 <when value="introns"/>
355 <when value="map_exons"/>
356 <when value="map_genes"/>
357 <when value="coords"/>
358 <when value="sam">
359 <param name="sam_paired_read" type="boolean" truevalue="sampe" falsevalue="samse" checked="false" label="SAM paired reads"/>
360 <param name="no_sam_headers" type="boolean" truevalue="--no-sam-headers" falsevalue="" checked="false" label="Do not print headers beginning with '@'"/>
361 <!-- Removed in gmap version 2011-11-30
362 <param name="noncanonical_splices" type="select" label="Print non-canonical genomic gaps greater than 20 nt in CIGAR string as STRING.">
363 <option value="">Use default</option>
364 <option value="N">N</option>
365 <option value="D">D</option>
366 </param>
367 -->
368 <param name="read_group_id" type="text" value="" label="Value to put into read-group id (RG-ID) field"/>
369 <param name="read_group_name" type="text" value="" label="Value to put into read-group name (RG-SM) field"/>
370 <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/>
371 <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/>
372 </when>
373 </conditional> <!-- name="result" -->
374
375 <param name="split_output" type="boolean" truevalue="--split-output=gmap_out" falsevalue="" checked="false" label="Separate outputs for nomapping, uniq, mult, and chimera" help="(chimera only when chimera-margin is selected)"/>
376
377
378 <!--
379 map=iitfile Map file. If argument is '?' (with the quotes), this lists available map files.
380 mapexons Map each exon separately
381 mapboth Report hits from both strands of genome
382 flanking=INT Show flanking hits (default 0)
383 print-comment Show comment line for each hit
384 -->
385
386
387 </inputs>
388 <outputs>
389 <data format="txt" name="gmap_stderr" label="${tool.name} on ${on_string}: stderr"/>
390 <data format="txt" name="output" label="${tool.name} on ${on_string} ${result.format}" >
391 <filter>(split_output == False)</filter>
392 <change_format>
393 <when input="result['format']" value="gff3_gene" format="gff3"/>
394 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
395 <when input="result['format']" value="gff3_match_est" format="gff3"/>
396 <when input="result['format']" value="sam" format="sam"/>
397 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
398 <when input="result['format']" value="introns" format="gmap_introns"/>
399 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
400 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
401 </change_format>
402 </data>
403 <data format="txt" name="uniq" label="${tool.name} on ${on_string} uniq.${result.format}" from_work_dir="gmap_out.uniq">
404 <filter>(split_output == True)</filter>
405 <change_format>
406 <when input="result['format']" value="gff3_gene" format="gff3"/>
407 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
408 <when input="result['format']" value="gff3_match_est" format="gff3"/>
409 <when input="result['format']" value="sam" format="sam"/>
410 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
411 <when input="result['format']" value="introns" format="gmap_introns"/>
412 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
413 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
414 </change_format>
415 </data>
416 <data format="txt" name="transloc" label="${tool.name} on ${on_string} transloc.${result.format}" from_work_dir="gmap_out.transloc">
417 <filter>(split_output == True)</filter>
418 <change_format>
419 <when input="result['format']" value="gff3_gene" format="gff3"/>
420 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
421 <when input="result['format']" value="gff3_match_est" format="gff3"/>
422 <when input="result['format']" value="sam" format="sam"/>
423 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
424 <when input="result['format']" value="introns" format="gmap_introns"/>
425 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
426 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
427 </change_format>
428 </data>
429 <data format="txt" name="nomapping" label="${tool.name} on ${on_string} nomapping.${result.format}" from_work_dir="gmap_out.nomapping">
430 <filter>(split_output == True)</filter>
431 <change_format>
432 <when input="result['format']" value="gff3_gene" format="gff3"/>
433 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
434 <when input="result['format']" value="gff3_match_est" format="gff3"/>
435 <when input="result['format']" value="sam" format="sam"/>
436 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
437 <when input="result['format']" value="introns" format="gmap_introns"/>
438 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
439 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
440 </change_format>
441 </data>
442 <data format="txt" name="mult" label="${tool.name} on ${on_string} mult.${result.format}" from_work_dir="gmap_out.mult">
443 <filter>(split_output == True)</filter>
444 <change_format>
445 <when input="result['format']" value="gff3_gene" format="gff3"/>
446 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
447 <when input="result['format']" value="gff3_match_est" format="gff3"/>
448 <when input="result['format']" value="sam" format="sam"/>
449 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
450 <when input="result['format']" value="introns" format="gmap_introns"/>
451 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
452 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
453 </change_format>
454 </data>
455 </outputs>
456 <tests>
457 </tests>
458
459 <help>
460
461 **What it does**
462
463 GMAP_ (Genomic Mapping and Alignment Program) The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains. It is developed by Thomas D. Wu of Genentech, Inc.
464
465 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
466
467 .. _GMAP: http://research-pub.gene.com/gmap/
468 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
469
470 ------
471
472 **Know what you are doing**
473
474 .. class:: warningmark
475
476 You will want to read the README_
477
478 .. _README: http://research-pub.gene.com/gmap/src/README
479
480 </help>
481 </tool>
482