6
|
1 <tool id="glimmer_build-icm" name="Glimmer ICM builder" version="0.2">
|
|
2 <description></description>
|
4
|
3 <requirements>
|
|
4 <requirement type="package" version="3.02b">glimmer</requirement>
|
|
5 </requirements>
|
|
6 <command>
|
|
7 build-icm
|
|
8 --depth $depth
|
|
9 #if $no_stops:
|
|
10 --no_stops
|
|
11 #end if
|
|
12 --period $period
|
|
13 --width $width
|
|
14
|
|
15 #if $stop_codon_opts.stop_codon_opts_selector == "gb":
|
|
16 --trans_table "${stop_codon_opts.genbank_gencode}"
|
|
17 #else:
|
|
18 --stop_codons "${stop_codon_opts.stop_codons}"
|
|
19 #end if
|
|
20
|
|
21 $outfile < $infile 2>&1;
|
|
22 </command>
|
|
23 <inputs>
|
|
24 <param name="infile" type="data" format="fasta" label="Trainings Dataset" help="A set of known genes in FASTA format." />
|
|
25 <param name="depth" type="integer" value="7" label="Set the depth of the ICM" help="The depth is the maximum number of positions in the context window that will be used to determine the probability of the predicted position." />
|
|
26 <param name="period" type="integer" value="3" label="Set the period of the ICM" help="The period is the number of different submodels for different positions in the text in a cyclic pattern. E.g., if the period is 3, the first submodel will determine positions 1, 4, 7, ..." />
|
|
27 <param name="width" type="integer" value="12" label="Set the width of the ICM" help="The width includes the predicted position." />
|
|
28 <param name="no_stops" type="boolean" truevalue="--no_stops" falsevalue="" checked="false" label="Do not use any input strings with in-frame stop codons" />
|
|
29
|
|
30 <conditional name="stop_codon_opts">
|
|
31 <param name="stop_codon_opts_selector" type="select" label="Specify start codons as">
|
|
32 <option value="gb" selected="True">Genbank translation table entry</option>
|
|
33 <option value="free_form">Comma-separated list</option>
|
|
34 </param>
|
|
35 <when value="gb">
|
|
36 <param name="genbank_gencode" type="select" label="Use Genbank translation table to specify stop codons">
|
|
37 <option value="1" select="True">1. Standard</option>
|
|
38 <option value="2">2. Vertebrate Mitochondrial</option>
|
|
39 <option value="3">3. Yeast Mitochondrial</option>
|
|
40 <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
|
|
41 <option value="5">5. Invertebrate Mitochondrial</option>
|
|
42 <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
|
|
43 <option value="9">9. Echinoderm Mitochondrial</option>
|
|
44 <option value="10">10. Euplotid Nuclear</option>
|
|
45 <option value="11">11. Bacteria and Archaea</option>
|
|
46 <option value="12">12. Alternative Yeast Nuclear</option>
|
|
47 <option value="13">13. Ascidian Mitochondrial</option>
|
|
48 <option value="14">14. Flatworm Mitochondrial</option>
|
|
49 <option value="15">15. Blepharisma Macronuclear</option>
|
|
50 <option value="16">16. Chlorophycean Mitochondrial</option>
|
|
51 <option value="21">21. Trematode Mitochondrial</option>
|
|
52 <option value="22">22. Scenedesmus obliquus mitochondrial</option>
|
|
53 <option value="23">23. Thraustochytrium Mitochondrial</option>
|
|
54 <option value="24">24. Pterobranchia mitochondrial</option>
|
|
55 </param>
|
|
56 </when>
|
|
57 <when value="free_form">
|
|
58 <param name="stop_codons" type="text" value="tag,tga,taa" label="Specify stop codons as a comma-separated list" />
|
|
59 </when>
|
|
60 </conditional>
|
|
61
|
|
62 </inputs>
|
|
63 <outputs>
|
|
64 <data format="data" name="outfile" />
|
|
65 </outputs>
|
|
66 <tests>
|
|
67 <test>
|
|
68 <param name="infile" value='glimmer3/seqTest.fa'/>
|
|
69 <output name="outfile" file='glimmer3/buildICMTestOutput.dat'/>
|
|
70 </test>
|
|
71 </tests>
|
|
72
|
|
73 <help>
|
|
74
|
|
75 **What it does**
|
|
76
|
6
|
77 This program constructs an interpolated context model (ICM) from an input set of sequences.
|
|
78
|
|
79 This model can be used by Glimmer3 to predict genes.
|
|
80
|
|
81 **TIP** To extract CDS from a GenBank file use the tool *Extract ORF from a GenBank file*.
|
4
|
82
|
|
83 -----
|
|
84
|
|
85 **Example**
|
|
86
|
6
|
87 *Input*::
|
4
|
88
|
6
|
89 - Genome Sequence
|
4
|
90
|
6
|
91 >CELF22B7 C.aenorhabditis elegans (Bristol N2) cosmid F22B7
|
|
92 GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
|
|
93 GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
|
|
94 TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
|
|
95 TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
|
|
96 GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
|
|
97 ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
|
|
98 AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
|
|
99 CAATTCCTGATTTACGAACATCTTCTTCAAGCATTCTACAGATTTCTTGA
|
|
100 TGCTCTTCTAGGAGGATGTTGAAATCCGAAGTTGGAGAAAAAGTTCTCTC
|
|
101 AACTGAAATGCTTTTTCTTCGTGGATCCGATTCAGATGGACGACCTGGCA
|
|
102 GTCCGAGAGCCGTTCGAAGGAAAGATTCTTGTGAGAGAGGCGTGAAACAC
|
|
103 AAAGGGTATAGGTTCTTCTTCAGATTCATATCACCAACAGTTTGAATATC
|
|
104 CATTGCTTTCAGTTGAGCTTCGCATACACGACCAATTCCTCCAACCTAAA
|
|
105 AAATTATCTAGGTAAAACTAGAAGGTTATGCTTTAATAGTCTCACCTTAC
|
|
106 GAATCGGTAAATCCTTCAAAAACTCCATAATCGCGTTTTTATCATTTTCT
|
|
107 .....
|
4
|
108
|
6
|
109 *Output*::
|
4
|
110 interpolated context model (ICM)
|
|
111
|
|
112 -------
|
|
113
|
|
114 **References**
|
|
115
|
|
116 A.L. Delcher, K.A. Bratke, E.C. Powers, and S.L. Salzberg. Identifying bacterial genes and endosymbiont DNA with Glimmer. Bioinformatics (Advance online version) (2007).
|
|
117
|
|
118
|
|
119 </help>
|
|
120 </tool>
|