annotate cd_hit_est.xml @ 1:12447208b8cb

Add tool_dependencies
author Jim Johnson <jj@umn.edu>
date Thu, 06 Sep 2012 21:48:31 -0500
parents 13900a1ad862
children c99f307f2bdb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="cd_hit_est" name="CD-HIT-EST" version="1.0">
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
2 <description>Cluster a nucleotide dataset into representative sequences</description>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
1
12447208b8cb Add tool_dependencies
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
4 <requirement type="package" version="4.6.1">cd-hit-est</requirement>
0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
5 </requirements>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
6 <command>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
7 cd-hit-est -i $fasta_in -o rep_seq -c $similarity -n $wordsize $strand
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
8 </command>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
9 <inputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
10 <param name="fasta_in" type="data" format="fasta" label="EST Sequences to cluster"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
11 <param name="similarity" type="float" value="0.9" label="similarity threshold: .75 - 1.0, default is .9">
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
12 <validator type="in_range" message="sequence similarity threshold should be .75 - 1.0" min=".75" max="1.0"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
13 </param>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
14 <param name="wordsize" type="integer" value="8" label="word size">
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
15 <help> Suggested word size:
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
16 8,9,10 for thresholds 0.90 ~ 1.0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
17 7 for thresholds 0.88 ~ 0.9
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
18 6 for thresholds 0.85 ~ 0.88
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
19 5 for thresholds 0.80 ~ 0.85
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
20 4 for thresholds 0.75 ~ 0.8
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
21 </help>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
22 <validator type="in_range" message="word size should be between 4 and 10" min="4" max="10"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
23 </param>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
24 <param name="strand" type="boolean" truevalue="-r 1" falsevalue="" checked="false" label="Compare both strands"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
25 </inputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
26 <outputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
27 <data format="txt" name="clusters_out" label="${tool.name} on ${on_string}: clusters" from_work_dir="rep_seq.clstr"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
28 <data format="fasta" name="fasta_out" label="${tool.name} on ${on_string}: representatives.fasta" from_work_dir="rep_seq"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
29 </outputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
30 <tests>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
31 </tests>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
32 <help>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
33 **CD-HIT-EST**
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
34
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
35 CD-HIT_ stands for Cluster Database at High Identity with Tolerance. The program (cd-hit) takes a fasta format sequence database as input and produces a set of 'non-redundant' (nr) representative sequences as output. In addition cd-hit outputs a cluster file, documenting the sequence 'groupies' for each nr sequence representative. The idea is to reduce the overall size of the database without removing any sequence information by only removing 'redundant' (or highly similar) sequences. This is why the resulting database is called non-redundant (nr). Essentially, cd-hit produces a set of closely related protein families from a given fasta sequence database.
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
36
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
37 .. _CD-HIT: http://www.bioinformatics.org/cd-hit/
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
38
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
39 ------
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
40
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
41 **Inputs**
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
42
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
43 cd-hit-est requires a fasta dataset as input.
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
44
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
45 ------
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
46
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
47 **Outputs**
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
48
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
49 A fasta datasets containing representative sequences.
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
50
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
51 A text file listing the mapping of sequences to the representative sequences::
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
52
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
53 >Cluster 0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
54 0 2799aa, >PF04998.6|RPOC2_CHLRE/275-3073... *
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
55 >Cluster 1
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
56 0 2214aa, >PF06317.1|Q6Y625_9VIRU/1-2214... at 80%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
57 1 2215aa, >PF06317.1|O09705_9VIRU/1-2215... at 84%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
58 2 2217aa, >PF06317.1|Q6Y630_9VIRU/1-2217... *
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
59 3 2216aa, >PF06317.1|Q6GWS6_9VIRU/1-2216... at 84%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
60 4 527aa, >PF06317.1|Q67E14_9VIRU/6-532... at 63%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
61 >Cluster 2
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
62 0 2202aa, >PF06317.1|Q6UY61_9VIRU/8-2209... at 60%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
63 1 2208aa, >PF06317.1|Q6IVU4_JUNIN/1-2208... *
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
64 2 2207aa, >PF06317.1|Q6IVU0_MACHU/1-2207... at 73%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
65 3 2208aa, >PF06317.1|RRPO_TACV/1-2208... at 69%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
66
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
67
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
68 </help>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
69 </tool>