annotate cmbuild.xml @ 11:5c6344f67ad0 draft

Uploaded
author bgruening
date Sun, 08 Sep 2013 06:48:18 -0400
parents 0bffd4183326
children c17ed5a530d2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
1 <tool id="infernal_cmbuild" name="Build covariance models" version="1.1.0.0">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
2 <description>from sequence alignments (cmbuild)</description>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
3 <parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="cmfile_outfile"></parallelism>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
4 <requirements>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
5 <requirement type="package">infernal</requirement>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
6 <requirement type="package" version="1.1rc4">infernal</requirement>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
7 <requirement type="package" version="8.21">gnu_coreutils</requirement>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
8 </requirements>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
9 <command>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
10 cmbuild
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
11 #if $is_summery_output:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
12 -o $summary_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
13 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
14
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
15 ## to many outputs, is that one really needed?
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
16 ##-O $annotated_source_alignment_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
17
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
18 $model_construction_opts.model_construction_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
19 #if $model_construction_opts.model_construction_opts_selector == '--fast':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
20 --symfrac $model_construction_opts.symfrac
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
21 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
22
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
23 $noss
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
24
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
25 $relative_weights_opts.relative_weights_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
26 #if $relative_weights_opts.relative_weights_opts_selector == '--wblosum':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
27 --wid $relative_weights_opts.wid
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
28 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
29
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
30 $effective_opts.effective_opts_selector
11
5c6344f67ad0 Uploaded
bgruening
parents: 10
diff changeset
31 #if str($effective_opts.effective_opts_selector) == '--eent':
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
32 --ere $effective_opts.ere
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
33 --eminseq $effective_opts.eminseq
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
34 --ehmmre $effective_opts.ehmmre
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
35 --eset $effective_opts.eset
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
36 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
37
11
5c6344f67ad0 Uploaded
bgruening
parents: 10
diff changeset
38 #if str($refining_opts.refining_opts_selector) == '--refine':
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
39 #if $refining_opts.refine_output:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
40 --refine $refined_multiple_alignment_output
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
41 #else:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
42 --refine /dev/null
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
43 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
44
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
45 $l
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
46 $refining_opts.gibbs_opts.gibbs_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
47
11
5c6344f67ad0 Uploaded
bgruening
parents: 10
diff changeset
48 #if str($refining_opts.gibbs_opts.gibbs_opts_selector) == '--gibbs':
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
49 $refining_opts.gibbs_opts.random_seed
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
50 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
51
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
52 $notrunc
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
53 $cyk
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
54 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
55
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
56 $cmfile_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
57 $alignment_infile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
58
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
59 </command>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
60 <inputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
61 <!-- Stockholm or SELEX
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
62 SELEX is defined in EMBOSS datatypes
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
63 -->
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
64 <param name="alignment_infile" type="data" format="stockholm,selex" label="Sequence database"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
65
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
66 <conditional name="model_construction_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
67 <param name="model_construction_opts_selector" type="select" label="These options control how consensus columns are defined in an alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
68 <option value="--fast" selected="true">automatic (--fast)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
69 <option value="--hand">user defined (--hand)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
70 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
71 <when value="--fast">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
72 <param name="symfrac" type="float" value="0.5" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
73 label="Define the residue fraction threshold necessary to define a consensus (--symfrac)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
74 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
75 <when value="--hand"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
76 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
77
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
78 <param name="noss" truevalue="--noss" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
79 label="Ignore the secondary structure annotation, if any, in your multiple alignment file (--noss)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
80
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
81 <conditional name="relative_weights_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
82 <param name="relative_weights_opts_selector" type="select" label="Options controlling relative weights" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
83 <option value="--wpb" selected="true">Henikoff (--wgb)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
84 <option value="--wgsc">Gerstein/Sonnhammer/Chothia (--wgsc)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
85 <option value="--wnone">no sequence weighting (--wnone)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
86 <option value="--wgiven">Sequence weight from given in input file (--wgiven)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
87 <option value="--wblosum">BLOSUM filtering algorithm (--wblosum)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
88 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
89 <when value="--wpb"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
90 <when value="--wgsc"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
91 <when value="--wnone"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
92 <when value="--wgiven"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
93 <when value="--wblosum">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
94 <param name="wid" type="float" value="0.5" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
95 label="Percent identity for clustering the alignment (--wid)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
96 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
97 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
98
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
99
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
100 <conditional name="effective_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
101 <param name="effective_opts_selector" type="select" label="Options controlling effective sequence number" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
102 <option value="--eent" selected="true">entropy weighting strategy (--eent)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
103 <option value="--enone">Turn off the entropy weighting strategy (--enone)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
104 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
105 <when value="--enone"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
106 <when value="--eent">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
107 <param name="ere" type="float" value="0.59" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
108 label="Set the target mean match state relative entropy (--ere)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
109
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
110 <param name="eminseq" type="integer" value="" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
111 label="Define the minimum allowed effective sequence number (--eminseq)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
112
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
113 <param name="ehmmre" type="float" value="" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
114 label="Set the target HMM mean match state relative entropy (--ehmmre)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
115
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
116 <param name="eset" type="integer" value="" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
117 label="Set the effective sequence number for entropy weighting (--eset)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
118 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
119 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
120
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
121
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
122 <conditional name="refining_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
123 <param name="refining_opts_selector" type="select" label="Options for refining the input alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
124 <option value="" selected="true">No refinement</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
125 <option value="--refine">refine the input alignment</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
126 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
127 <when value=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
128 <when value="--refine">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
129
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
130 <conditional name="gibbs_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
131 <param name="gibbs_opts_selector" type="select" label="refinement mode" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
132 <option value="" selected="true">expectation-maximization (EM)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
133 <option value="--gibbs">Gibbs sampling</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
134 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
135 <when value=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
136 <when value="--gibbs">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
137 <param name="random_seed" type="integer" value="0" label="Randam Seed" help="" />
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
138 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
139 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
140
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
141 <param name="l" truevalue="-l" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
142 label="Turn on the local alignment algorithm" help="... which allows the alignment to span two or more subsequences if necessary"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
143
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
144 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
145 label="Turn off the truncated alignment algorithm" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
146
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
147 <param name="cyk" type="select" label="Options for refining the input alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
148 <option value="" selected="true">optimal accuracy algorithm</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
149 <option value="--cyk">align with the CYK algorithm</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
150 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
151
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
152 <param name="refine_output" truevalue="" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
153 label="Output the refined alignment file as it is used to build the covariance model" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
154
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
155 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
156 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
157
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
158
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
159 <param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
160 label="Output a summery file?" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
161
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
162 </inputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
163 <outputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
164
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
165 <data format="text" name="summary_outfile" label="cmbuild summary on ${on_string}">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
166 <filter>is_summery_output is True</filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
167 </data>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
168 <!--<data format="stockholm" name="annotated_source_alignment_outfile" label="Annotated alignment from ${on_string}"/>-->
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
169 <data format="cm" name="cmfile_outfile" label="Covariance models from ${on_string}"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
170
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
171 <data format="stockholm" name="refined_multiple_alignment_output" label="refined alignment file of ${on_string}">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
172 <filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
173 ((
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
174 refining_opts['refining_opts_selector'] == "--refine" and
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
175 refining_opts['refine_output'] is True
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
176 ))
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
177 </filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
178 </data>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
179
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
180 </outputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
181 <help>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
182
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
183
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
184 **What it does**
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
185
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
186 For each multiple sequence alignment build a covariance model.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
187 The alignment file must be in Stockholm or SELEX format, and must contain consensus secondary structure annotation.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
188 cmbuild uses the consensus structure to determine the architecture of the CM.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
189
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
190 In addition to writing CM(s) to CMFILE_OUT, cmbuild also outputs a single line for each model created to stdout. Each
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
191 line has the following fields: ”aln”: the index of the alignment used to build the CM; ”idx”: the index of the CM in the
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
192 CMFILE_OUT; ”name”: the name of the CM; ”nseq”: the number of sequences in the alignment used to build the CM;
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
193 ”eff nseq”: the effective number of sequences used to build the model; ”alen”: the length of the alignment used to build
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
194 the CM; ”clen”: the number of columns from the alignment defined as consensus (match) columns; ”bps”: the number
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
195 of basepairs in the CM; ”bifs”: the number of bifurcations in the CM; ”rel entropy: CM”: the total relative entropy of the
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
196 model divided by the number of consensus columns; ”rel entropy: HMM”: the total relative entropy of the model ignoring
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
197 secondary structure divided by the number of consensus columns. ”description”: description of the model/alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
198
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
199
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
200 Options controlling model construction
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
201 --------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
202
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
203 These options control how consensus columns are defined in an alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
204
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
205 * --fast Define consensus columns automatically as those that have a fraction >= symfrac of residues as opposed to gaps. (See below for the --symfrac option.) This is the default.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
206 * --hand Use reference coordinate annotation (#=GC RF line, in Stockholm) to determine which columns are consensus, and which are inserts. Any non-gap character indicates a consensus column. (For example, mark consensus columns with ”x”, and insert columns with ”.”.)
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
207 * --symfrac Define the residue fraction threshold necessary to define a consensus column when not using --hand. The default is 0.5. The symbol fraction in each column is calculated after taking relative sequence weighting into account. Setting this to 0.0 means that every alignment column will be assigned as consensus, which may be useful in some cases. Setting it to 1.0 means that only columns that include 0 gaps will be assigned as consensus.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
208 * --noss Ignore the secondary structure annotation, if any, in MSA-Infile and build a CM with zero basepairs. This model will be similar to a profile HMM and the cmsearch and cmscan programs will use HMM algorithms which are faster than CM ones for this model. Additionally, a zero basepair model need not be calibrated with cmcalibrate prior to running cmsearch with it. The --noss option must be used if there is no secondary structure annotation in MSA-Infile.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
209
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
210
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
211 Options controlling relative weights
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
212 ------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
213
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
214 cmbuild uses an ad hoc sequence weighting algorithm to downweight closely related sequences and upweight distantly
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
215 related ones. This has the effect of making models less biased by uneven phylogenetic representation. For example,
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
216 two identical sequences would typically each receive half the weight that one sequence would. These options control
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
217 which algorithm gets used.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
218
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
219 * --wpb Use the Henikoff position-based sequence weighting scheme [Henikoff and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
220 * --wgsc Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J. Mol. Biol. 235:1067, 1994].
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
221 * --wnone Turn sequence weighting off; e.g. explicitly set all sequence weights to 1.0.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
222 * --wgiven Use sequence weights as given in annotation in the input alignment file. If no weights were given, assume they are all 1.0. The default is to determine new sequence weights by the Gerstein/Sonnhammer/Chothia algorithm, ignoring any annotated weights.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
223 * --wblosum Use the BLOSUM filtering algorithm to weight the sequences, instead of the default GSC weighting. Cluster the sequences at a given percentage identity (see --wid); assign each cluster a total weight of 1.0, distributed equally amongst the members of that cluster.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
224 * --wid Controls the behavior of the --wblosum weighting option by setting the percent identity for clustering the alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
225
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
226
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
227 Options controlling effective sequence number
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
228 ---------------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
229
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
230 After relative weights are determined, they are normalized to sum to a total effective sequence number, eff nseq. This
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
231 number may be the actual number of sequences in the alignment, but it is almost always smaller than that. The default
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
232 entropy weighting method (--eent) reduces the effective sequence number to reduce the information content (relative
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
233 entropy, or average expected score on true homologs) per consensus position. The target relative entropy is controlled
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
234 by a two-parameter function, where the two parameters are settable with --ere and --esigma.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
235
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
236 * --eent Use the entropy weighting strategy to determine the effective sequence number that gives a target mean match state relative entropy. This option is the default, and can be turned off with --enone. The default target mean match state relative entropy is 0.59 bits for models with at least 1 basepair and 0.38 bits for models with zero basepairs, but changed with --ere. The default of 0.59 or 0.38 bits is automatically changed if the total relative entropy of the model (summed match state relative entropy) is less than a cutoff, which is is 6.0 bits by default, but can be changed with the expert, undocumented --eX option. If you really want to play with that option, consult the source code.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
237 * --enone Turn off the entropy weighting strategy. The effective sequence number is just the number of sequences in the alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
238 * --ere Set the target mean match state relative entropy. By default the target relative entropy per match position is 0.59 bits for models with at least 1 basepair and 0.38 for models with zero basepairs.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
239 * --eminseq Define the minimum allowed effective sequence number.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
240 * --ehmmre Set the target HMM mean match state relative entropy. Entropy for basepairing match states is calculated using marginalized basepair emission probabilities.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
241 * --eset Set the effective sequence number for entropy weighting.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
242
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
243
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
244
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
245 Options for refining the input alignment
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
246 ----------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
247
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
248 * --refine Attempt to refine the alignment before building the CM using expectation-maximization (EM). A CM is first built from the initial alignment as usual. Then, the sequences in the alignment are realigned optimally (with the HMM banded CYK algorithm, optimal means optimal given the bands) to the CM, and a new CM is built from the resulting alignment. The sequences are then realigned to the new CM, and a new CM is built from that alignment. This is continued until convergence, specifically when the alignments for two successive iterations are not significantly different (the summed bit scores of all the sequences in the alignment changes less than 1% between two successive iterations).
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
249 * -l Turn on the local alignment algorithm, which allows the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. The default is to globally align the query model to the target sequences.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
250 * --gibbs Modifies the behavior of --refine so Gibbs sampling is used instead of EM. The difference is that during the alignment stage the alignment is not necessarily optimal, instead an alignment (parsetree) for each sequences is sampled from the posterior distribution of alignments as determined by the Inside algorithm. Due to this sampling step --gibbs is non- deterministic, so different runs with the same alignment may yield different results. This is not true when --refine is used without the --gibbs option, in which case the final alignment and CM will always be the same. When --gibbs is enabled, the --seed "number" option can be used to seed the random number generator predictably, making the results reproducible. The goal of the --gibbs option is to help expert RNA alignment curators refine structural alignments by allowing them to observe alternative high scoring alignments.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
251 * --seed Seed the random number generator with an integer >= 0. This option can only be used in combination with --gibbs. If the given number is nonzero, stochastic sampling of alignments will be reproducible; the same command will give the same results. If the given number is 0, the random number generator is seeded arbitrarily, and stochastic samplings may vary from run to run of the same command. The default seed is 0.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
252 * --cyk With --refine, align with the CYK algorithm. By default the optimal accuracy algorithm is used. There is more information on this in the cmalign manual page.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
253 * --notrunc With --refine, turn off the truncated alignment algorithm. There is more information on this in the cmalign manual page.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
254
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
255
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
256 For further questions please refere to the Infernal Userguide_.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
257
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
258 .. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
259
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
260
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
261 How do I cite Infernal?
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
262 -----------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
263
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
264 The Infernal 1.0 paper (Nawrocki et al., 2009) is the best paper to reference.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
265 If you’re writing for an enlightened (url-friendly) journal, you may want to cite the webpage
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
266 http://infernal.janelia.org/ because it is kept up-to-date. We hope to publish a paper related to
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
267 Infernal version 1.1 soon.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
268
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
269 **Galaxy Wrapper Author**::
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
270
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
271 * Bjoern Gruening, University of Freiburg
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
272
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
273 </help>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
274 </tool>