comparison cmsearch.xml @ 1:26ef7e66c39b draft

Uploaded
author bgruening
date Sat, 31 Aug 2013 08:03:52 -0400
parents
children 468be760eba3
comparison
equal deleted inserted replaced
0:3d17c3197be5 1:26ef7e66c39b
1 <tool id="infernal_cmsearch" name="Search covariance model(s)" version="1.1.0">
2 <description>against a sequence database (cmsearch)</description>
3 <requirements>
4 <requirement type="package" version="1.1rc4">infernal</requirement>
5 </requirements>
6 <command>
7 cmsearch --tblout
8 --cpu 12
9 -o /dev/null
10 --tformat $seqdb.ext #target format: fasta, embl, genbank, ddbj, stockholm, pfam, a2m, afa, clustal, and phylip
11 $bottomonly
12 $toponly
13 $cyk
14 $notrunc
15 $max
16 $nohmm
17 $mid
18 $bitscore_thresholds
19 --tblout $outfile
20 $g
21 #if $A:
22 $A $multiple_alignment_output
23 #end if
24
25
26 #if $inclusion_thresholds_opts.inclusion_thresholds_selector == "--incE":
27 --incE $inclusion_thresholds_opts.incE
28 #else:
29 --incT $inclusion_thresholds_opts.incT
30 #end if
31
32 #if $reporting_thresholds_opts.reporting_thresholds_selector == "-E":
33 -E $inclusion_thresholds_opts.E
34 #else:
35 -T $inclusion_thresholds_opts.T
36 #end if
37
38 #if $cm_opts.cm_opts_selector == "db":
39 $cm_opts.database.fields.path
40 #else:
41 $cm_opts.cmfile
42 #end if
43
44 $seqdb
45
46
47 2>&#38;1
48 </command>
49 <inputs>
50
51 <param name="seqdb" type="data" format="fasta,embl,genbank" label="Sequence database"/>
52
53 <conditional name="cm_opts">
54 <param name="cm_opts_selector" type="select" label="Subject covariance models">
55 <option value="db" selected="True">Locally installed covariance models</option>
56 <option value="histdb">Covariance model from your history</option>
57 </param>
58 <when value="db">
59 <param name="database" type="select" label="Covariance models">
60 <options from_file="infernal.loc">
61 <column name="value" index="0"/>
62 <column name="name" index="1"/>
63 <column name="path" index="2"/>
64 </options>
65 </param>
66 </when>
67 <when value="histdb">
68 <param name="cmfile" type="data" format="fasta" label="Covariance models"/>
69 </when>
70 </conditional>
71
72 <param name="g" truevalue="-g" falsevalue="" checked="False" type="boolean"
73 label="Turn on the glocal alignment algorithm ..." help="... global with respect to the query model and local with respect to the target database."/>
74
75 <param name="bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean"
76 label="Only search the bottom (Crick) strand of target sequences" help="in the sequence database"/>
77 <param name="toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean"
78 label="Only search the top (Watson) strand of target sequences" help="in the sequence database"/>
79
80 <param name="cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
81 label="Use the CYK algorithm, not Inside, to determine the final score of all hits" help=""/>
82 <param name="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
83 label="Use the CYK algorithm to align hits" help="By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues."/>
84
85 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
86 label="Turn off truncated hit detection" help=""/>
87
88 <!-- accelleration pipeline -->
89
90 <param name="max" truevalue="--max" falsevalue="" checked="False" type="boolean"
91 label="Turn off all filters, and run non-banded Inside on every full-length target sequence" help="This
92 increases sensitivity somewhat, at an extremely large cost in speed."/>
93
94 <param name="nohmm" truevalue="--nohmm" falsevalue="" checked="False" type="boolean"
95 label="Turn off all HMM filter stages " help=""/>
96
97 <param name="mid" truevalue="--mid" falsevalue="" checked="False" type="boolean"
98 label="Turn off the HMM SSV and Viterbi filter stages" help=""/>
99
100
101 <!-- Options for model-specific score thresholding -->
102
103 <param name="bitscore_thresholds" type="select" label="Bit score thresholds" help="Curated CM databases may define specific bit score thresholds for each CM, superseding any thresholding based on statistical significance alone.">
104 <option value="" selected="true">None</option>
105 <option value="--cut_ga">GA (gathering) bit scores</option>
106 <option value="--cut_nc">NC (noise cutoff) bit score</option>
107 <option value="--cut_tc">TC (trusted cutoff) bit score</option>
108 </param>
109
110 <!-- Options for inclusion thresholds -->
111
112
113 <conditional name="inclusion_thresholds_opts">
114 <param name="inclusion_thresholds_selector" type="select" label="Inclusion thresholds"
115 help="Inclusion thresholds are stricter than reporting thresholds. Inclusion thresholds control which hits are considered to be reliable enough to be included in an output alignment or in a possible subsequent search round, or marked as significant (”!”) as opposed to questionable (”?”) in hit output.">
116 <option value="--incE" selected="true">Use E-value</option>
117 <option value="--incT">Use bit score</option>
118 </param>
119 <when value="--incE">
120 <param name="incE" type="float" value="0.01" size="5" label="Use E-value" help="of &lt;= X as the hit inclusion threshold.">
121 <sanitizer>
122 <valid initial="string.printable">
123 <remove value="&apos;"/>
124 </valid>
125 </sanitizer>
126 </param>
127 </when>
128 <when value="--incT">
129 <param name="incT" type="integer" size="5" label="Use bit score" help="of >= X as the hit inclusion threshold.">
130 <sanitizer>
131 <valid initial="string.printable">
132 <remove value="&apos;"/>
133 </valid>
134 </sanitizer>
135 </param>
136 </when>
137 </conditional>
138
139
140 <!-- Options controlling reporting thresholds -->
141
142 <conditional name="reporting_thresholds_opts">
143 <param name="reporting_thresholds_selector" type="select" label="reporting thresholds"
144 help="Reporting thresholds control which hits are reported in output files">
145 <option value="-E" selected="true">Use E-value</option>
146 <option value="-T">Use bit score</option>
147 </param>
148 <when value="-E">
149 <param name="E" type="float" value="10.0" size="5" label="Use E-value" help="of &lt;= X as the hit reporting threshold. The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it’s really noise.">
150 <sanitizer>
151 <valid initial="string.printable">
152 <remove value="&apos;"/>
153 </valid>
154 </sanitizer>
155 </param>
156 </when>
157 <when value="-T">
158 <param name="T" type="integer" size="5" label="Use bit score" help="of >= X as the hit reporting threshold.">
159 <sanitizer>
160 <valid initial="string.printable">
161 <remove value="&apos;"/>
162 </valid>
163 </sanitizer>
164 </param>
165 </when>
166 </conditional>
167
168 <param name="A" truevalue="-A" falsevalue="" checked="False" type="boolean"
169 label="Save a multiple alignment of all significant hits ..." help="... those satisfying inclusion thresholds"/>
170
171
172 </inputs>
173 <outputs>
174
175 <data format="tabular" name="outfile" label="cmsearch on ${on_string}"/>
176 <data format="tabular" name="multiple_alignment_output" label="cmsearch on ${on_string} (multi alignment)">
177 <filter>A is True</filter>
178 </data>
179
180 </outputs>
181 <requirements>
182 </requirements>
183 <help>
184
185 **What it does**
186
187 Options for model-specific score thresholding
188 ---------------------------------------------
189
190
191 - GA thresholds are generally considered to be the reliable curated thresholds defining family membership; for example, in Rfam, these thresholds define what gets included in Rfam Full alignments based on searches with Rfam Seed models.
192 - NC thresholds are generally considered to be the score of the highest-scoring known false positive.
193 - TC thresholds are generally considered to be the score of the lowest-scoring known true positive that is above all known false positives.
194
195
196 -g Turn on the glocal alignment algorithm, global with respect to the query model and local with
197 respect to the target database. By default, the local alignment algorithm is used which is
198 local with respect to both the target sequence and the model. In local mode, the alignment to
199 span two or more subsequences if necessary (e.g. if the structures of the query model and
200 target sequence are only partially shared), allowing certain large insertions and deletions
201 in the structure to be penalized differently than normal indels. Local mode performs better
202 on empirical benchmarks and is significantly more sensitive for remote homology detection.
203 Empirically, glocal searches return many fewer hits than local searches, so glocal may be
204 desired for some applications. With -g, all models must be calibrated, even those with zero
205 basepairs.
206
207
208
209
210 </help>
211 </tool>