Mercurial > repos > jjohnson > cdhit
comparison cd_hit_est.xml @ 11:75fde37f69e5
Add cd-hit to protein fastas
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 27 Jun 2013 21:27:06 -0500 |
parents | f0c20796d33a |
children |
comparison
equal
deleted
inserted
replaced
10:211ca88ce047 | 11:75fde37f69e5 |
---|---|
1 <tool id="cd_hit_est" name="CD-HIT-EST" version="1.1"> | 1 <tool id="cd_hit_est" name="CD-HIT-EST" version="1.2"> |
2 <description>Cluster a nucleotide dataset into representative sequences</description> | 2 <description>Cluster a nucleotide dataset into representative sequences</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="4.6.1">cd-hit</requirement> | 4 <requirement type="package" version="4.6.1">cd-hit</requirement> |
5 </requirements> | 5 </requirements> |
6 <macros> | |
7 <import>cdhit_macros.xml</import> | |
8 </macros> | |
6 <command> | 9 <command> |
7 cd-hit-est -i $fasta_in -o rep_seq -c $similarity -n $wordsize $strand | 10 cd-hit-est -i "$fasta_in" -o rep_seq -c $similarity -n $wordsize $strand |
11 #include source=$common_cdhit_options# | |
12 #include source=$runtime_tuning# | |
8 </command> | 13 </command> |
9 <inputs> | 14 <inputs> |
10 <param name="fasta_in" type="data" format="fasta" label="EST Sequences to cluster"/> | 15 <param name="fasta_in" type="data" format="fasta" label="EST Sequences to cluster"/> |
11 <param name="similarity" type="float" value="0.9" label="similarity threshold: .75 - 1.0, default is .9"> | 16 <param name="similarity" type="float" value="0.9" label="similarity threshold: .75 - 1.0, default is .9"> |
12 <validator type="in_range" message="sequence similarity threshold should be .75 - 1.0" min=".75" max="1.0"/> | 17 <validator type="in_range" message="sequence similarity threshold should be .75 - 1.0" min=".75" max="1.0"/> |
20 4 for thresholds 0.75 ~ 0.8 | 25 4 for thresholds 0.75 ~ 0.8 |
21 </help> | 26 </help> |
22 <validator type="in_range" message="word size should be between 4 and 10" min="4" max="10"/> | 27 <validator type="in_range" message="word size should be between 4 and 10" min="4" max="10"/> |
23 </param> | 28 </param> |
24 <param name="strand" type="boolean" truevalue="-r 1" falsevalue="" checked="false" label="Compare both strands"/> | 29 <param name="strand" type="boolean" truevalue="-r 1" falsevalue="" checked="false" label="Compare both strands"/> |
30 <expand macro="common_cdhit_options" /> | |
31 <expand macro="runtime_tuning" /> | |
25 </inputs> | 32 </inputs> |
26 <outputs> | 33 <outputs> |
27 <data format="txt" name="clusters_out" label="${tool.name} on ${on_string}: clusters" from_work_dir="rep_seq.clstr"/> | 34 <data format="txt" name="clusters_out" label="${tool.name} on ${on_string}: clusters" from_work_dir="rep_seq.clstr"/> |
28 <data format="fasta" name="fasta_out" label="${tool.name} on ${on_string}: representatives.fasta" from_work_dir="rep_seq"/> | 35 <data format="fasta" name="fasta_out" label="${tool.name} on ${on_string}: representatives.fasta" from_work_dir="rep_seq"/> |
29 </outputs> | 36 </outputs> |
30 <tests> | 37 <tests> |
31 <test> | 38 <test> |
39 <!-- Expect 3 clusters: 0,1,2 --> | |
32 <param name="fasta_in" value="cd_hit_est_in.fa" /> | 40 <param name="fasta_in" value="cd_hit_est_in.fa" /> |
33 <param name="similarity" value="0.9"/> | 41 <param name="similarity" value="0.9"/> |
34 <param name="wordsize" value="8"/> | 42 <param name="wordsize" value="8"/> |
43 <param name="strand" value="true"/> | |
44 <!-- conditionals in macros --> | |
45 <param name="settings" value="no"/> | |
46 <param name="tuning" value="default"/> | |
35 <output name="clusters_out"> | 47 <output name="clusters_out"> |
36 <assert_contents> | 48 <assert_contents> |
37 <has_text text=">Cluster" /> | 49 <has_text text=">Cluster 0" /> |
50 <!-- There should not be a Cluster 3 --> | |
51 <not_has_text text="Cluster 3" /> | |
52 <has_text_matching expression="F12Fcsw_481739" /> | |
53 </assert_contents> | |
54 </output> | |
55 <output name="fasta_out"> | |
56 <assert_contents> | |
57 <has_text_matching expression="^>[MF]\d\dFcsw_\d*" /> | |
58 </assert_contents> | |
59 </output> | |
60 </test> | |
61 <test> | |
62 <!-- tighter constraints should yield more clusters --> | |
63 <param name="fasta_in" value="cd_hit_est_in.fa" /> | |
64 <param name="similarity" value="0.95"/> | |
65 <param name="wordsize" value="9"/> | |
66 <param name="strand" value="true"/> | |
67 <!-- conditionals in macros --> | |
68 <param name="settings" value="no"/> | |
69 <param name="tuning" value="default"/> | |
70 <output name="clusters_out"> | |
71 <assert_contents> | |
72 <has_text text=">Cluster 4" /> | |
38 <has_text_matching expression=">F12Fcsw_481739" /> | 73 <has_text_matching expression=">F12Fcsw_481739" /> |
39 </assert_contents> | 74 </assert_contents> |
40 </output> | 75 </output> |
41 <output name="fasta_out"> | 76 <output name="fasta_out"> |
42 <assert_contents> | 77 <assert_contents> |