annotate cd_hit_est.xml @ 6:f0c20796d33a

Update tool_dependencies, reinstate test
author Jim Johnson <jj@umn.edu>
date Tue, 11 Sep 2012 08:55:27 -0500
parents 22b6a4412ea6
children 75fde37f69e5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
4872c1cf2bff version 1.1
Jim Johnson <jj@umn.edu>
parents: 3
diff changeset
1 <tool id="cd_hit_est" name="CD-HIT-EST" version="1.1">
0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
2 <description>Cluster a nucleotide dataset into representative sequences</description>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
2
c99f307f2bdb correct requirement - cd-hit
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
4 <requirement type="package" version="4.6.1">cd-hit</requirement>
0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
5 </requirements>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
6 <command>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
7 cd-hit-est -i $fasta_in -o rep_seq -c $similarity -n $wordsize $strand
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
8 </command>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
9 <inputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
10 <param name="fasta_in" type="data" format="fasta" label="EST Sequences to cluster"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
11 <param name="similarity" type="float" value="0.9" label="similarity threshold: .75 - 1.0, default is .9">
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
12 <validator type="in_range" message="sequence similarity threshold should be .75 - 1.0" min=".75" max="1.0"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
13 </param>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
14 <param name="wordsize" type="integer" value="8" label="word size">
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
15 <help> Suggested word size:
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
16 8,9,10 for thresholds 0.90 ~ 1.0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
17 7 for thresholds 0.88 ~ 0.9
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
18 6 for thresholds 0.85 ~ 0.88
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
19 5 for thresholds 0.80 ~ 0.85
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
20 4 for thresholds 0.75 ~ 0.8
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
21 </help>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
22 <validator type="in_range" message="word size should be between 4 and 10" min="4" max="10"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
23 </param>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
24 <param name="strand" type="boolean" truevalue="-r 1" falsevalue="" checked="false" label="Compare both strands"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
25 </inputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
26 <outputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
27 <data format="txt" name="clusters_out" label="${tool.name} on ${on_string}: clusters" from_work_dir="rep_seq.clstr"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
28 <data format="fasta" name="fasta_out" label="${tool.name} on ${on_string}: representatives.fasta" from_work_dir="rep_seq"/>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
29 </outputs>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
30 <tests>
3
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
31 <test>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
32 <param name="fasta_in" value="cd_hit_est_in.fa" />
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
33 <param name="similarity" value="0.9"/>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
34 <param name="wordsize" value="8"/>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
35 <output name="clusters_out">
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
36 <assert_contents>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
37 <has_text text=">Cluster" />
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
38 <has_text_matching expression=">F12Fcsw_481739" />
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
39 </assert_contents>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
40 </output>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
41 <output name="fasta_out">
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
42 <assert_contents>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
43 <has_text_matching expression="^>[MF]\d\dFcsw_\d*" />
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
44 </assert_contents>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
45 </output>
22a3fc9a14d7 Add functional test
Jim Johnson <jj@umn.edu>
parents: 2
diff changeset
46 </test>
0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
47 </tests>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
48 <help>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
49 **CD-HIT-EST**
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
50
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
51 CD-HIT_ stands for Cluster Database at High Identity with Tolerance. The program (cd-hit) takes a fasta format sequence database as input and produces a set of 'non-redundant' (nr) representative sequences as output. In addition cd-hit outputs a cluster file, documenting the sequence 'groupies' for each nr sequence representative. The idea is to reduce the overall size of the database without removing any sequence information by only removing 'redundant' (or highly similar) sequences. This is why the resulting database is called non-redundant (nr). Essentially, cd-hit produces a set of closely related protein families from a given fasta sequence database.
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
52
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
53 .. _CD-HIT: http://www.bioinformatics.org/cd-hit/
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
54
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
55 ------
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
56
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
57 **Inputs**
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
58
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
59 cd-hit-est requires a fasta dataset as input.
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
60
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
61 ------
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
62
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
63 **Outputs**
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
64
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
65 A fasta datasets containing representative sequences.
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
66
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
67 A text file listing the mapping of sequences to the representative sequences::
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
68
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
69 >Cluster 0
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
70 0 2799aa, >PF04998.6|RPOC2_CHLRE/275-3073... *
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
71 >Cluster 1
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
72 0 2214aa, >PF06317.1|Q6Y625_9VIRU/1-2214... at 80%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
73 1 2215aa, >PF06317.1|O09705_9VIRU/1-2215... at 84%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
74 2 2217aa, >PF06317.1|Q6Y630_9VIRU/1-2217... *
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
75 3 2216aa, >PF06317.1|Q6GWS6_9VIRU/1-2216... at 84%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
76 4 527aa, >PF06317.1|Q67E14_9VIRU/6-532... at 63%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
77 >Cluster 2
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
78 0 2202aa, >PF06317.1|Q6UY61_9VIRU/8-2209... at 60%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
79 1 2208aa, >PF06317.1|Q6IVU4_JUNIN/1-2208... *
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
80 2 2207aa, >PF06317.1|Q6IVU0_MACHU/1-2207... at 73%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
81 3 2208aa, >PF06317.1|RRPO_TACV/1-2208... at 69%
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
82
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
83
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
84 </help>
13900a1ad862 Uploaded
jjohnson
parents:
diff changeset
85 </tool>