annotate cmbuild.xml @ 9:1aa4c036e41c draft

Uploaded
author bgruening
date Sat, 07 Sep 2013 16:14:02 -0400
parents
children 0bffd4183326
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
1 <tool id="infernal_cmbuild" name="Build covariance models" version="1.1.0.0">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
2 <description>from sequence alignments (cmbuild)</description>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
3 <parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="cmfile_outfile"></parallelism>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
4 <requirements>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
5 <requirement type="package">infernal</requirement>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
6 <requirement type="package" version="1.1rc4">infernal</requirement>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
7 <requirement type="package" version="8.21">gnu_coreutils</requirement>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
8 </requirements>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
9 <command>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
10 cmbuild
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
11 #if $is_summery_output:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
12 -o $summary_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
13 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
14
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
15 ## to many outputs, is that one really needed?
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
16 ##-O $annotated_source_alignment_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
17
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
18 $model_construction_opts.model_construction_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
19 #if $model_construction_opts.model_construction_opts_selector == '--fast':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
20 --symfrac $model_construction_opts.symfrac
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
21 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
22
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
23 $noss
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
24
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
25 $relative_weights_opts.relative_weights_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
26 #if $relative_weights_opts.relative_weights_opts_selector == '--wblosum':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
27 --wid $relative_weights_opts.wid
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
28 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
29
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
30 $effective_opts.effective_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
31 #if $effective_opts.effective_opts_selector == '--eent':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
32 --ere $effective_opts.ere
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
33 --eminseq $effective_opts.eminseq
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
34 --ehmmre $effective_opts.ehmmre
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
35 --eset $effective_opts.eset
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
36 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
37
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
38 #if $refining_opts.refining_opts_selector == '--refine':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
39 #if $refining_opts.refine_output:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
40 --refine $refined_multiple_alignment_output
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
41 #else:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
42 --refine /dev/null
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
43 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
44
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
45 $l
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
46 $refining_opts.gibbs_opts.gibbs_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
47
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
48 #if $refining_opts.gibbs_opts.gibbs_opts_selector == '--gibbs':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
49 $refining_opts.gibbs_opts.random_seed
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
50 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
51
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
52 $notrunc
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
53 $cyk
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
54 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
55
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
56 $cmfile_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
57 $alignment_infile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
58
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
59 </command>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
60 <inputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
61 <!-- Stockholm or SELEX
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
62 SELEX is defined in EMBOSS datatypes
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
63 -->
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
64 <param name="alignment_infile" type="data" format="stockholm,selex" label="Sequence database"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
65
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
66 <conditional name="model_construction_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
67 <param name="model_construction_opts_selector" type="select" label="These options control how consensus columns are defined in an alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
68 <option value="--fast" selected="true">automatic (--fast)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
69 <option value="--hand">user defined (--hand)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
70 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
71 <when value="--fast">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
72 <param name="symfrac" type="float" value="0.5" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
73 label="Define the residue fraction threshold necessary to define a consensus (--symfrac)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
74 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
75 <when value="--hand"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
76 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
77
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
78 <param name="noss" truevalue="--noss" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
79 label="Ignore the secondary structure annotation, if any, in your multiple alignment file (--noss)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
80
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
81 <conditional name="relative_weights_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
82 <param name="relative_weights_opts_selector" type="select" label="Options controlling relative weights" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
83 <option value="--wpb" selected="true">Henikoff (--wgb)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
84 <option value="--wgsc">Gerstein/Sonnhammer/Chothia (--wgsc)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
85 <option value="--wnone">no sequence weighting (--wnone)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
86 <option value="--wgiven">Sequence weight from given in input file (--wgiven)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
87 <option value="--wblosum">BLOSUM filtering algorithm (--wblosum)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
88 <option value="--wid">BLOSUM filtering algorithm (--wblosum)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
89 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
90 <when value="--wpb"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
91 <when value="--wgsc"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
92 <when value="--wnone"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
93 <when value="--wgiven"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
94 <when value="--wblosum">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
95 <param name="wid" type="float" value="0.5" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
96 label="Percent identity for clustering the alignment (--wid)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
97 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
98 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
99
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
100
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
101 <conditional name="effective_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
102 <param name="effective_opts_selector" type="select" label="Options controlling effective sequence number" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
103 <option value="--eent" selected="true">entropy weighting strategy (--eent)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
104 <option value="--enone">Turn off the entropy weighting strategy (--enone)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
105 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
106 <when value="--enone"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
107 <when value="--eent">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
108 <param name="ere" type="float" value="0.59" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
109 label="Set the target mean match state relative entropy (--ere)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
110
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
111 <param name="eminseq" type="integer" value="" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
112 label="Define the minimum allowed effective sequence number (--eminseq)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
113
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
114 <param name="ehmmre" type="float" value="" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
115 label="Set the target HMM mean match state relative entropy (--ehmmre)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
116
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
117 <param name="eset" type="integer" value="" size="5"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
118 label="Set the effective sequence number for entropy weighting (--eset)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
119 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
120 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
121
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
122
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
123 <conditional name="refining_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
124 <param name="refining_opts_selector" type="select" label="Options for refining the input alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
125 <option value="" selected="true">No refinement</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
126 <option value="--refine">refine the input alignment</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
127 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
128 <when value=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
129 <when value="--refine">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
130
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
131 <conditional name="gibbs_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
132 <param name="gibbs_opts_selector" type="select" label="refinement mode" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
133 <option value="" selected="true">expectation-maximization (EM)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
134 <option value="--gibbs">Gibbs sampling</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
135 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
136 <when value=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
137 <when value="--gibbs">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
138 <param name="random_seed" type="integer" value="0" label="Randam Seed" help="" />
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
139 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
140 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
141
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
142 <param name="l" truevalue="-l" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
143 label="Turn on the local alignment algorithm" help="... which allows the alignment to span two or more subsequences if necessary"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
144
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
145 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
146 label="Turn off the truncated alignment algorithm" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
147
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
148 <param name="cyk" type="select" label="Options for refining the input alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
149 <option value="" selected="true">optimal accuracy algorithm</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
150 <option value="--cyk">align with the CYK algorithm</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
151 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
152
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
153 <param name="refine_output" truevalue="" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
154 label="Output the refined alignment file as it is used to build the covariance model" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
155
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
156 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
157 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
158
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
159
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
160 <param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean"
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
161 label="Output a summery file?" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
162
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
163 </inputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
164 <outputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
165
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
166 <data format="text" name="summary_outfile" label="cmbuild summary on ${on_string}">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
167 <filter>is_summery_output is True</filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
168 </data>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
169 <!--<data format="stockholm" name="annotated_source_alignment_outfile" label="Annotated alignment from ${on_string}"/>-->
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
170 <data format="cm" name="cmfile_outfile" label="Covariance models from ${on_string}"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
171
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
172 <data format="stockholm" name="refined_multiple_alignment_output" label="refined alignment file of ${on_string}">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
173 <filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
174 ((
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
175 refining_opts['refining_opts_selector'] == "--refine" and
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
176 refining_opts['refine_output'] is True
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
177 ))
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
178 </filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
179 </data>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
180
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
181 </outputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
182 <help>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
183
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
184
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
185 **What it does**
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
186
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
187 For each multiple sequence alignment build a covariance model.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
188 The alignment file must be in Stockholm or SELEX format, and must contain consensus secondary structure annotation.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
189 cmbuild uses the consensus structure to determine the architecture of the CM.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
190
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
191 In addition to writing CM(s) to CMFILE_OUT, cmbuild also outputs a single line for each model created to stdout. Each
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
192 line has the following fields: ”aln”: the index of the alignment used to build the CM; ”idx”: the index of the CM in the
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
193 CMFILE_OUT; ”name”: the name of the CM; ”nseq”: the number of sequences in the alignment used to build the CM;
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
194 ”eff nseq”: the effective number of sequences used to build the model; ”alen”: the length of the alignment used to build
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
195 the CM; ”clen”: the number of columns from the alignment defined as consensus (match) columns; ”bps”: the number
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
196 of basepairs in the CM; ”bifs”: the number of bifurcations in the CM; ”rel entropy: CM”: the total relative entropy of the
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
197 model divided by the number of consensus columns; ”rel entropy: HMM”: the total relative entropy of the model ignoring
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
198 secondary structure divided by the number of consensus columns. ”description”: description of the model/alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
199
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
200
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
201 Options controlling model construction
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
202 --------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
203
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
204 These options control how consensus columns are defined in an alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
205
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
206 * --fast Define consensus columns automatically as those that have a fraction >= symfrac of residues as opposed to gaps. (See below for the --symfrac option.) This is the default.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
207 * --hand Use reference coordinate annotation (#=GC RF line, in Stockholm) to determine which columns are consensus, and which are inserts. Any non-gap character indicates a consensus column. (For example, mark consensus columns with ”x”, and insert columns with ”.”.)
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
208 * --symfrac Define the residue fraction threshold necessary to define a consensus column when not using --hand. The default is 0.5. The symbol fraction in each column is calculated after taking relative sequence weighting into account. Setting this to 0.0 means that every alignment column will be assigned as consensus, which may be useful in some cases. Setting it to 1.0 means that only columns that include 0 gaps will be assigned as consensus.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
209 * --noss Ignore the secondary structure annotation, if any, in MSA-Infile and build a CM with zero basepairs. This model will be similar to a profile HMM and the cmsearch and cmscan programs will use HMM algorithms which are faster than CM ones for this model. Additionally, a zero basepair model need not be calibrated with cmcalibrate prior to running cmsearch with it. The --noss option must be used if there is no secondary structure annotation in MSA-Infile.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
210
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
211
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
212 Options controlling relative weights
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
213 ------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
214
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
215 cmbuild uses an ad hoc sequence weighting algorithm to downweight closely related sequences and upweight distantly
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
216 related ones. This has the effect of making models less biased by uneven phylogenetic representation. For example,
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
217 two identical sequences would typically each receive half the weight that one sequence would. These options control
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
218 which algorithm gets used.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
219
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
220 * --wpb Use the Henikoff position-based sequence weighting scheme [Henikoff and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
221 * --wgsc Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J. Mol. Biol. 235:1067, 1994].
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
222 * --wnone Turn sequence weighting off; e.g. explicitly set all sequence weights to 1.0.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
223 * --wgiven Use sequence weights as given in annotation in the input alignment file. If no weights were given, assume they are all 1.0. The default is to determine new sequence weights by the Gerstein/Sonnhammer/Chothia algorithm, ignoring any annotated weights.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
224 * --wblosum Use the BLOSUM filtering algorithm to weight the sequences, instead of the default GSC weighting. Cluster the sequences at a given percentage identity (see --wid); assign each cluster a total weight of 1.0, distributed equally amongst the members of that cluster.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
225 * --wid Controls the behavior of the --wblosum weighting option by setting the percent identity for clustering the alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
226
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
227
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
228 Options controlling effective sequence number
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
229 ---------------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
230
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
231 After relative weights are determined, they are normalized to sum to a total effective sequence number, eff nseq. This
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
232 number may be the actual number of sequences in the alignment, but it is almost always smaller than that. The default
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
233 entropy weighting method (--eent) reduces the effective sequence number to reduce the information content (relative
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
234 entropy, or average expected score on true homologs) per consensus position. The target relative entropy is controlled
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
235 by a two-parameter function, where the two parameters are settable with --ere and --esigma.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
236
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
237 * --eent Use the entropy weighting strategy to determine the effective sequence number that gives a target mean match state relative entropy. This option is the default, and can be turned off with --enone. The default target mean match state relative entropy is 0.59 bits for models with at least 1 basepair and 0.38 bits for models with zero basepairs, but changed with --ere. The default of 0.59 or 0.38 bits is automatically changed if the total relative entropy of the model (summed match state relative entropy) is less than a cutoff, which is is 6.0 bits by default, but can be changed with the expert, undocumented --eX option. If you really want to play with that option, consult the source code.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
238 * --enone Turn off the entropy weighting strategy. The effective sequence number is just the number of sequences in the alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
239 * --ere Set the target mean match state relative entropy. By default the target relative entropy per match position is 0.59 bits for models with at least 1 basepair and 0.38 for models with zero basepairs.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
240 * --eminseq Define the minimum allowed effective sequence number.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
241 * --ehmmre Set the target HMM mean match state relative entropy. Entropy for basepairing match states is calculated using marginalized basepair emission probabilities.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
242 * --eset Set the effective sequence number for entropy weighting.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
243
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
244
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
245
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
246 Options for refining the input alignment
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
247 ----------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
248
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
249 * --refine Attempt to refine the alignment before building the CM using expectation-maximization (EM). A CM is first built from the initial alignment as usual. Then, the sequences in the alignment are realigned optimally (with the HMM banded CYK algorithm, optimal means optimal given the bands) to the CM, and a new CM is built from the resulting alignment. The sequences are then realigned to the new CM, and a new CM is built from that alignment. This is continued until convergence, specifically when the alignments for two successive iterations are not significantly different (the summed bit scores of all the sequences in the alignment changes less than 1% between two successive iterations).
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
250 * -l Turn on the local alignment algorithm, which allows the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. The default is to globally align the query model to the target sequences.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
251 * --gibbs Modifies the behavior of --refine so Gibbs sampling is used instead of EM. The difference is that during the alignment stage the alignment is not necessarily optimal, instead an alignment (parsetree) for each sequences is sampled from the posterior distribution of alignments as determined by the Inside algorithm. Due to this sampling step --gibbs is non- deterministic, so different runs with the same alignment may yield different results. This is not true when --refine is used without the --gibbs option, in which case the final alignment and CM will always be the same. When --gibbs is enabled, the --seed "number" option can be used to seed the random number generator predictably, making the results reproducible. The goal of the --gibbs option is to help expert RNA alignment curators refine structural alignments by allowing them to observe alternative high scoring alignments.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
252 * --seed Seed the random number generator with an integer >= 0. This option can only be used in combination with --gibbs. If the given number is nonzero, stochastic sampling of alignments will be reproducible; the same command will give the same results. If the given number is 0, the random number generator is seeded arbitrarily, and stochastic samplings may vary from run to run of the same command. The default seed is 0.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
253 * --cyk With --refine, align with the CYK algorithm. By default the optimal accuracy algorithm is used. There is more information on this in the cmalign manual page.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
254 * --notrunc With --refine, turn off the truncated alignment algorithm. There is more information on this in the cmalign manual page.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
255
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
256
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
257 For further questions please refere to the Infernal Userguide_.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
258
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
259 .. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
260
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
261
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
262 How do I cite Infernal?
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
263 -----------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
264
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
265 The Infernal 1.0 paper (Nawrocki et al., 2009) is the best paper to reference.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
266 If you’re writing for an enlightened (url-friendly) journal, you may want to cite the webpage
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
267 http://infernal.janelia.org/ because it is kept up-to-date. We hope to publish a paper related to
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
268 Infernal version 1.1 soon.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
269
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
270 **Galaxy Wrapper Author**::
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
271
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
272 * Bjoern Gruening, University of Freiburg
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
273
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
274 </help>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
275 </tool>