comparison MutCount.xml @ 5:0ba551449008 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 273a9af69b672b2580cd5dec4c0e67a4a96fb0fe
author abims-sbr
date Tue, 27 Feb 2018 08:48:34 -0500
parents 5766f80370e7
children fe74cf0d4e7a
comparison
equal deleted inserted replaced
4:5766f80370e7 5:0ba551449008
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 2
3 <tool name="MutCount" id="mutcount" version="2.0"> 3 <tool name="MutCount" id="mutcount" version="2.1">
4 <description> 4 <description>
5 This tool proceeds to count codons, amino acids on each species of a set of species, and then proceeds to permutation tests. 5 This tool proceeds to count codons, amino acids on each species of a set of species, and then proceeds to permutation tests.
6 </description> 6 </description>
7 7
8 <macros> 8 <macros>
9 <import>macros.xml</import> 9 <import>macros.xml</import>
10 </macros> 10 </macros>
11 11
12 <requirements> 12 <requirements>
13 <expand macro="python_required" /> 13 <expand macro="python_required" />
14 <requirement type="package" version="0.20.0">pandas</requirement>
15 <requirement type="package" version="1.12.0">numpy</requirement>
14 </requirements> 16 </requirements>
15 17
16 <command> 18 <command>
17 <![CDATA[ 19 <![CDATA[
18 20
19 ln -s $__tool_directory__/scripts/functions.py . && 21 ln -s $__tool_directory__/scripts/functions.py . &&
20 22
21 #if str($method.method_run) == "concat" : 23 #if str($method.method_run) == "concat" :
22 python '$__tool_directory__/scripts/S01a_mutcount_pairs.py' $method.num_sampled $method.num_iter $method.list_species 24 python '$__tool_directory__/scripts/S01a_codons_counting.py' ${method.concat_nuc} '$method.list_species' '$method.list_species_boot' $method.num_iter $method.num_sampled > ${log}
23 &&
24 python '$__tool_directory__/scripts/S02a_codon_counting.py' ${method.concat_nuc}
25 #end if 25 #end if
26 26
27 #if str($method.method_run) == "separated" : 27 #if str($method.method_run) == "separated" :
28 #set $infiles = "" 28 #set $infiles = ""
29 #for $input in $method.sep_file 29 #for $input in $method.sep_file
40 cp '$__tool_directory__/scripts/amino_acid_properties.csv' . 40 cp '$__tool_directory__/scripts/amino_acid_properties.csv' .
41 && 41 &&
42 python '$__tool_directory__/scripts/S01b_study_seq_composition_aa.py' '$infiles' ${method.concat_phy} 42 python '$__tool_directory__/scripts/S01b_study_seq_composition_aa.py' '$infiles' ${method.concat_phy}
43 #end if 43 #end if
44 #end if 44 #end if
45
45 ]]> 46 ]]>
46 </command> 47 </command>
47 48
48 <inputs> 49 <inputs>
49 <conditional name="method"> 50 <conditional name="method">
52 <option value="separated">Set of separated genes (from ORF_Search output "output zip containing files with CDS without indel")</option> 53 <option value="separated">Set of separated genes (from ORF_Search output "output zip containing files with CDS without indel")</option>
53 </param> 54 </param>
54 55
55 <when value="concat"> 56 <when value="concat">
56 <param name="concat_nuc" type="data" format="fasta" label="Choose your fasta file in nucleic format" help="It must contain the concatenated file in NUCLEIC format from Phylogeny tool" /> 57 <param name="concat_nuc" type="data" format="fasta" label="Choose your fasta file in nucleic format" help="It must contain the concatenated file in NUCLEIC format from Phylogeny tool" />
57 <param name="num_sampled" type="integer" value="100" min="0" label="Number of iterations"/> 58 <param name="list_species" type="text" size="100" label="List of species for countings" help="List the species separated with a comma (for e.g Ap,As,Ct,Gt,Yu)" />
58 <param name="num_iter" type="integer" value="100" min="0" label="Number of sampled codons"/> 59 <param name="list_species_boot" type="text" size="100" label="List of species used for resampling" help="List the species separated with a comma (for e.g Ap,As,Ct,Gt,Yu)" />
59 <param name="list_species" type="text" size="100" label="List of species" help="List the species separated with a comma (for e.g Ap,As,Ct,Gt,Yu)" /> 60 <param name="num_iter" type="integer" value="1000" min="0" label="Number of sampled codons" help="Sets the length (in codons) of the resampled sequences"/>
61 <param name="num_sampled" type="integer" value="1000" min="0" label="Number of iterations" help="Sets the number of resampled sequences"/>
60 </when> 62 </when>
61 63
62 <when value="separated"> 64 <when value="separated">
63 <param name="format_run" type="select" label="Which format do you want to use for this tool (concatenation and RAxML run) ? "> 65 <param name="format_run" type="select" label="Which format do you want to use for this tool (concatenation and RAxML run) ? ">
64 <option value="nucleic">Nucleic format</option> 66 <option value="nucleic">Nucleic format</option>
70 </conditional> 72 </conditional>
71 </inputs> 73 </inputs>
72 74
73 <outputs> 75 <outputs>
74 <!-- output concat --> 76 <!-- output concat -->
75 <!-- 77 <data format="txt" name="log" label="MutCount_concat_log.output" />
76 <data format="txt" name="output1" label="counts.txt" from_work_dir="counts.txt" > 78 <data format="csv" name="codons_freqs" label="codons_freqs.csv" from_work_dir="codons_freqs.csv" >
77 <filter>(method['method_run']=='concat')</filter> 79 <filter>(method['method_run']=='concat')</filter>
78 </data> 80 </data>
79 <data format="txt" name="output2" label="biases.txt" from_work_dir="biases.txt" > 81 <data format="csv" name="aa_freqs" label="aa_freqs.csv" from_work_dir="aa_freqs.csv" >
80 <filter>(method['method_run']=='concat')</filter> 82 <filter>(method['method_run']=='concat')</filter>
81 </data> 83 </data>
82 --> 84 <data format="csv" name="aatypes_freqs" label="aatypes_freqs.csv" from_work_dir="aatypes_freqs.csv" >
83 <data format="csv" name="codons_counts" label="codons_counts.csv" from_work_dir="codons_counts.csv" > 85 <filter>(method['method_run']=='concat')</filter>
84 <filter>(method['method_run']=='concat')</filter> 86 </data>
85 </data> 87 <data format="csv" name="gc_and_others_freqs" label="gc_and_others_freqs.csv" from_work_dir="gc_and_others_freqs.csv" >
86 <data format="csv" name="aa_counts" label="aa_counts.csv" from_work_dir="aa_counts.csv" > 88 <filter>(method['method_run']=='concat')</filter>
87 <filter>(method['method_run']=='concat')</filter> 89 </data>
88 </data> 90 <data format="csv" name="codons_transitions_freqs" label="codons_transitions_freqs" from_work_dir="codons_transitions_freqs.csv" >
89 <data format="csv" name="aatypes_counts" label="aatypes_counts.csv" from_work_dir="aatypes_counts.csv" > 91 <filter>(method['method_run']=='concat')</filter>
90 <filter>(method['method_run']=='concat')</filter> 92 </data>
91 </data> 93 <data format="csv" name="aa_transitions_freqs" label="aa_transitions_freqs.csv" from_work_dir="aa_transitions_freqs.csv" >
92 <data format="csv" name="gc_counts" label="gc_counts.csv" from_work_dir="gc_counts.csv" > 94 <filter>(method['method_run']=='concat')</filter>
93 <filter>(method['method_run']=='concat')</filter> 95 </data>
94 </data> 96 <data format="csv" name="aatypes_transitions_freqs" label="aatypes_transitions.csv" from_work_dir="aatypes_transitions_freqs.csv" >
95 <data format="csv" name="aa_transitions" label="aa_transitions.csv" from_work_dir="aa_transitions.csv" >
96 <filter>(method['method_run']=='concat')</filter>
97 </data>
98 <data format="csv" name="aatypes_transitions" label="aatypes_transitions.csv" from_work_dir="aatypes_transitions.csv" >
99 <filter>(method['method_run']=='concat')</filter> 97 <filter>(method['method_run']=='concat')</filter>
100 </data> 98 </data>
101 99
102 <!-- outputs separated - nucleic --> 100 <!-- outputs separated - nucleic -->
103 <data format="csv" name="nuc_comp" label="nuc_compositions.csv" from_work_dir="OUT/nuc_compositions.csv" > 101 <data format="csv" name="nuc_comp" label="nuc_compositions.csv" from_work_dir="OUT/nuc_compositions.csv" >
154 <filter>(method['method_run']=='separated' and method['format_run']== 'proteic')</filter> 152 <filter>(method['method_run']=='separated' and method['format_run']== 'proteic')</filter>
155 </data> 153 </data>
156 154
157 </outputs> 155 </outputs>
158 156
159 <tests> 157 <tests>
160 <test> 158 <test>
161 <conditional name="method" > 159 <conditional name="method" >
162 <param name="method_run" value="concat" /> 160 <param name="method_run" value="concat" />
163 <param name="concat_nuc" ftype="fasta" value="test_07_output_phylogeny_concatenation.fasta" /> 161 <param name="concat_nuc" ftype="fasta" value="concatenation.fasta" />
164 <param name="num_sampled" value="100" /> 162 <param name="list_species" ftype="text" value="Ps,Pp,Pu,Ac,Ap,Pf,Pg,Ph,Pi" />
165 <param name="num_iter" value="100" /> 163 <param name="list_species_boot" ftype="text" value="Ps,Pp,Pu,Pf" />
166 <param name="list_species" ftype="text" value="Ac,Am,Ap,Pu" /> 164 <param name="num_iter" value="200" />
167 </conditional> 165 <param name="num_sampled" value="200" />
168 <output name="codons_counts" value="OUT_concat/codons_counts.csv" lines_diff="8"/> 166 </conditional>
169 <output name="aa_counts" value="OUT_concat/aa_counts.csv" lines_diff="8"/> 167 <output name="log" value="OUT_concat/MutCount_concat_log.output" lines_diff="2"/>
170 <output name="aatypes_counts" value="OUT_concat/aatypes_counts.csv" lines_diff="8"/> 168 <output name="codons_freqs" value="OUT_concat/codons_freqs.csv" lines_diff="18"/>
171 <output name="gc_counts" value="OUT_concat/gc_counts.csv"/> 169 <output name="aa_freqs" value="OUT_concat/aa_freqs.csv" lines_diff="18"/>
172 <output name="aa_transitions" value="OUT_concat/aa_transitions.csv" lines_diff="14"/> 170 <output name="aatypes_freqs" value="OUT_concat/aatypes_freqs.csv" lines_diff="18"/>
173 <output name="aatypes_transitions" value="OUT_concat/aatypes_transitions.csv" lines_diff="14"/> 171 <output name="gc_and_others_freqs" value="OUT_concat/gc_and_others_freqs.csv"/>
172 <output name="codons_transitions_freqs" value="OUT_concat/codons_transitions_freqs.csv" lines_diff="72"/>
173 <output name="aa_transitions_freqs" value="OUT_concat/aa_transitions_freqs.csv" lines_diff="72"/>
174 <output name="aatypes_transitions_freqs" value="OUT_concat/aatypes_transitions_freqs.csv" lines_diff="72"/>
174 </test> 175 </test>
175 176
176 <test> 177 <test>
177 <conditional name="method" > 178 <conditional name="method" >
178 <param name="method_run" value="separated" /> 179 <param name="method_run" value="separated" />
237 @HELP_AUTHORS@ 238 @HELP_AUTHORS@
238 239
239 <![CDATA[ 240 <![CDATA[
240 241
241 **Last Version** : Victor Mataigne and Gildas Le Corguillé 242 **Last Version** : Victor Mataigne and Gildas Le Corguillé
243
242 -------- 244 --------
243 245
244 **Description** 246 **Description**
245 247
246 This script counts the number of codons, amino acids, and types of amino acids in sequences, as well as the mutation bias from one item to another between 2 sequences. Counting is then compared to empirical p-values, obtained from bootstrapped sequences obtained from a subset of sequences. 248 This script counts the number of codons, amino acids, and types of amino acids in sequences, as well as the mutation bias from one item to another between 2 sequences. Counting is then compared to empirical p-values, obtained from bootstrapped sequences obtained from a subset of sequences.
267 269
268 **Parameters** 270 **Parameters**
269 271
270 There are parameters only for the "Concatenated" method : 272 There are parameters only for the "Concatenated" method :
271 273
274 - The list of species for **countings**, separated by commas and without space (e.g : sp1,sp2,sp3,sp4). You can run the tool on subgroup of species, not only on the total number of species present in the previous tools.
275
276 - The list of species for **resampling**, separated by commas and without space (e.g : sp1,sp2,sp3,sp4). You can run the tool on subgroup of species, not only on the total number of species present in the previous tools.
277
272 - The number of iterations : the number of alignments that will be generated (effect on the resolution of the gaussian distribution). Shouldn't be lower than 1000 to have a relatively smooth gaussian distribution. 278 - The number of iterations : the number of alignments that will be generated (effect on the resolution of the gaussian distribution). Shouldn't be lower than 1000 to have a relatively smooth gaussian distribution.
273 279
274 - The number of sampled codons : the number of pairs of codons in each generated alignments (effect on the robustness on the countings performed on this alignement). Shouldn't be lower than 1000 to detect codons with relatively low occurence (<1%). 280 - The number of sampled codons : the number of pairs of codons in each generated alignments (effect on the robustness on the countings performed on this alignement). Shouldn't be lower than 1000 to detect codons with relatively low occurence (<1%).
275
276 - The list of species, separated by commas and without space (e.g : sp1,sp2,sp3,sp4). You can run the tool on subgroup of species, not only on the total number of species present in the previous tools. You can also write 'all' to include every species.
277 281
278 -------- 282 --------
279 283
280 **Outputs** 284 **Outputs**
281 285
293 --------- 297 ---------
294 298
295 Changelog 299 Changelog
296 --------- 300 ---------
297 301
298 **Version 2.1 - 10/01/2017** 302 **Version 2.1 - 26/02/2017**
299 303 - Fully re-written the concat method : fixed mistakes + cleaner code
300 - Splitted output of concatenated method in several csv files. 304 - Splitted output of concatenated method in several csv files.
301 - Bug corrected in output files of separated method. 305 - Bug corrected in output files of separated method.
302 306
303 **Version 2.0 - 12/07/2017** 307 **Version 2.0 - 12/07/2017**
304 308