42
|
1 <tool id="plant_tribes_phylogenomics_analysis" name="Create multiple sequence alignments" version="0.3">
|
|
2 <description>and inferred maximum likelihood phylogenies</description>
|
0
|
3 <requirements>
|
39
|
4 <requirement type="package" version="0.3">plant_tribes_phylogenomics_analysis</requirement>
|
0
|
5 </requirements>
|
|
6 <stdio>
|
|
7 <!-- Anything other than zero is an error -->
|
|
8 <exit_code range="1:" />
|
40
|
9 <exit_code range=":-1" />
|
|
10 <!-- In case the return code has not been set propery check stderr too -->
|
0
|
11 <regex match="Error:" />
|
|
12 <regex match="Exception:" />
|
|
13 </stdio>
|
|
14 <command>
|
|
15 <![CDATA[
|
3
|
16 #import os
|
9
|
17 #import sys
|
14
|
18
|
42
|
19 #set input_format = $input_format_cond.input_format
|
|
20 #set pipeline_output_dir = 'phylogenomicsAnalysis_dir'
|
|
21 #set scaffold_path = $scaffold.fields.path
|
|
22 #set scaffold_dir = $os.path.split($scaffold_path)[0]
|
|
23 #set scaffold_selection = $os.path.split($scaffold_path)[1]
|
|
24 #set src_aln_dir = $os.path.join($pipeline_output_dir, 'orthogroups_aln')
|
|
25 #set src_fasta_dir = $os.path.join($pipeline_output_dir, 'orthogroups_fasta')
|
44
|
26 #set src_tree_dir = $os.path.join($pipeline_output_dir, 'orthogroups_tree')
|
14
|
27
|
42
|
28 #if str($input_format) == 'ptcgf':
|
|
29 #set dest_fasta_dir = $output_fasta_ptcgf.extra_files_path
|
|
30 #set multiple_codon_alignments = 'no'
|
45
|
31 #set use_corresponding_coding_sequences = 'no'
|
14
|
32 #else:
|
42
|
33 #set dest_fasta_dir = $output_fasta_ptcgfcs.extra_files_path
|
|
34 #set multiple_codon_alignments_cond = $input_format_cond.multiple_codon_alignments_cond
|
|
35 #set multiple_codon_alignments = $multiple_codon_alignments_cond.multiple_codon_alignments
|
|
36 #if str($multiple_codon_alignments) == 'yes':
|
|
37 #set use_corresponding_coding_sequences_cond = $multiple_codon_alignments_cond.use_corresponding_coding_sequences_cond
|
|
38 #set use_corresponding_coding_sequences = $use_corresponding_coding_sequences_cond.use_corresponding_coding_sequences
|
|
39 #else:
|
|
40 #set use_corresponding_coding_sequences = 'no'
|
|
41 #end if
|
10
|
42 #end if
|
42
|
43 mkdir -p $dest_fasta_dir &&
|
|
44
|
|
45 #if str($options_type.options_type_selector) == 'advanced':
|
|
46 #set multiple_sequence_alignments_cond = $options_type.multiple_sequence_alignments_cond
|
|
47 #set multiple_sequence_alignments = $multiple_sequence_alignments_cond.multiple_sequence_alignments
|
|
48 #if str($multiple_sequence_alignments) == 'yes':
|
|
49 #set multiple_sequence_alignments_option_cond = $options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond
|
|
50 #set multiple_sequence_alignments_option = $multiple_sequence_alignments_option_cond.multiple_sequence_alignments_option
|
|
51 #set generate_output_aln = True
|
|
52 #set dest_aln_dir = $output_aln.extra_files_path
|
|
53 mkdir -p $dest_aln_dir &&
|
|
54 #else:
|
|
55 #set generate_output_aln = False
|
|
56 #end if
|
48
|
57 #else:
|
|
58 #set generate_output_aln = False
|
46
|
59 #end if
|
14
|
60
|
|
61 #if str($options_type.options_type_selector) == 'advanced' and str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes':
|
|
62 #set generate_output_tree = True
|
|
63 #set dest_tree_dir = $output_tree.extra_files_path
|
|
64 mkdir -p $dest_tree_dir &&
|
|
65 #else:
|
|
66 #set generate_output_tree = False
|
|
67 #end if
|
|
68
|
0
|
69 PhylogenomicsAnalysis
|
44
|
70 --num_threads \${GALAXY_SLOTS:-4}
|
47
|
71 #if str($input_format) == 'ptcgf':
|
|
72 --orthogroup_faa '$input_format_cond.input_ptcgf.extra_files_path'
|
|
73 #else:
|
|
74 --orthogroup_faa '$input_format_cond.input_ptcgfcs.extra_files_path'
|
|
75 #end if
|
40
|
76 --scaffold_dir '$scaffold_dir'
|
|
77 --scaffold '$scaffold_selection'
|
0
|
78 --method $method
|
14
|
79
|
0
|
80 #if str($options_type.options_type_selector) == 'advanced':
|
42
|
81 #if str($multiple_sequence_alignments) == 'yes':
|
5
|
82 #if str($multiple_sequence_alignments_option) == 'create_alignments':
|
0
|
83 --create_alignments
|
5
|
84 #else if str($multiple_sequence_alignments_option) == 'add_alignments':
|
0
|
85 --add_alignments
|
5
|
86 #else if str($multiple_sequence_alignments_option) == 'pasta_alignments':
|
40
|
87 --pasta_script_path '$__tool_directory__/run_pasta.py'
|
0
|
88 --pasta_alignments
|
|
89 --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit
|
|
90 #end if
|
|
91 #end if
|
14
|
92
|
0
|
93 #if str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes':
|
|
94 #set tree_inference_cond = $options_type.phylogenetic_trees_cond.tree_inference_cond
|
|
95 #set tree_inference_method = $tree_inference_cond.tree_inference
|
|
96 --tree_inference $tree_inference_method
|
|
97 #if str($tree_inference_method) == 'raxml':
|
14
|
98 #set rooting_order_file_cond = $tree_inference_cond.rooting_order_file_cond
|
|
99 #set rooting_order_file = $rooting_order_file_cond.rooting_order_file
|
|
100 #if str($rooting_order_file) == 'yes':
|
49
|
101 --rooting_order '$rooting_order_file_cond.rooting_order'
|
14
|
102 #end if
|
0
|
103 --bootstrap_replicates $tree_inference_cond.bootstrap_replicates
|
|
104 #end if
|
|
105 --max_orthogroup_size $options_type.phylogenetic_trees_cond.max_orthogroup_size
|
|
106 --min_orthogroup_size $options_type.phylogenetic_trees_cond.min_orthogroup_size
|
|
107 #end if
|
14
|
108
|
0
|
109 #if str($options_type.msa_quality_control_options_cond.msa_quality_control_options) == 'yes':
|
|
110 #set msa_quality_control_options_cond = $options_type.msa_quality_control_options_cond
|
|
111 #set remove_gappy_sequences_cond = $msa_quality_control_options_cond.remove_gappy_sequences_cond
|
|
112 #if str($remove_gappy_sequences_cond) == 'yes':
|
|
113 #set trim_type_cond = $remove_gappy_sequences_cond.trim_type_cond
|
|
114 --remove_sequences $remove_gappy_sequences_cond.remove_sequences
|
|
115 #if str($trim_type_cond.trim_type) == 'automated_trimming':
|
|
116 --automated_trimming
|
26
|
117 #else:
|
0
|
118 --gap_trimming $trim_type_cond.gap_trimming
|
|
119 #end if
|
|
120 #end if
|
|
121 #end if
|
|
122 #end if
|
14
|
123
|
42
|
124 #if str($input_format) == 'ptcgf':
|
|
125 && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgf
|
|
126 && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgf
|
|
127 #else:
|
|
128 && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgfcs
|
|
129 && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgfcs
|
|
130 #end if
|
|
131
|
37
|
132 && mv $src_fasta_dir/* $dest_fasta_dir || true
|
14
|
133
|
|
134 #if $generate_output_aln:
|
36
|
135 && echo "Multiple sequence alignments using method '$multiple_sequence_alignments_option' (note that some files may be empty): `ls $src_aln_dir | grep f | wc -l` files" > $output_aln
|
35
|
136 && ls -al $src_aln_dir | grep f >> $output_aln
|
37
|
137 && mv $src_aln_dir/* $dest_aln_dir || true
|
14
|
138 #end if
|
|
139
|
|
140 #if $generate_output_tree:
|
35
|
141 && echo "Phylogenetic trees (note that some files may be empty): `ls $src_tree_dir | grep f | wc -l` files" > $output_tree
|
|
142 && ls -al $src_tree_dir | grep f >> $output_tree
|
37
|
143 && mv $src_tree_dir/* $dest_tree_dir || true
|
14
|
144 #end if
|
0
|
145 ]]>
|
|
146 </command>
|
|
147 <inputs>
|
42
|
148 <conditional name="input_format_cond">
|
|
149 <param name="input_format" type="select" label="Select type of data to sub sample">
|
|
150 <option value="ptcgf">Gene family clusters</option>
|
|
151 <option value="ptcgfcs">Gene family clusters and corresponding coding sequences</option>
|
|
152 </param>
|
|
153 <when value="ptcgf">
|
|
154 <param name="input_ptcgf" format="ptcgf" type="data" label="Gene family clusters"/>
|
|
155 </when>
|
|
156 <when value="ptcgfcs">
|
|
157 <param name="input_ptcgfcs" format="ptcgfcs" type="data" label="Gene family clusters with corresponding coding sequences"/>
|
|
158 <conditional name="multiple_codon_alignments_cond">
|
|
159 <param name="multiple_codon_alignments" type="select" label="Construct orthogroup multiple codon alignments?">
|
|
160 <option value="no" selected="True">No</option>
|
|
161 <option value="yes">Yes</option>
|
|
162 </param>
|
|
163 <when value="no"/>
|
|
164 <when value="yes">
|
|
165 <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference - (dna)">
|
|
166 <option value="protein" selected="true">Amino acid based</option>
|
|
167 <option value="dna">Nucleotide based</option>
|
|
168 </param>
|
|
169 <conditional name="use_corresponding_coding_sequences_cond">
|
|
170 <param name="use_corresponding_coding_sequences" type="select" label="Use corresponding coding sequences?">
|
|
171 <option value="no" selected="true">No</option>
|
|
172 <option value="yes">Yes</option>
|
|
173 </param>
|
|
174 <when value="no" />
|
|
175 <when value="yes" />
|
|
176 </conditional>
|
|
177 </when>
|
|
178 </conditional>
|
|
179 </when>
|
|
180 </conditional>
|
0
|
181 <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
|
|
182 <options from_data_table="plant_tribes_scaffolds" />
|
42
|
183 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
|
0
|
184 </param>
|
|
185 <param name="method" type="select" label="Protein clustering method">
|
|
186 <option value="gfam" selected="true">GFam</option>
|
|
187 <option value="orthofinder">OrthoFinder</option>
|
|
188 <option value="orthomcl">OrthoMCL</option>
|
|
189 </param>
|
|
190 <conditional name="options_type">
|
|
191 <param name="options_type_selector" type="select" label="Options Configuration">
|
|
192 <option value="basic" selected="true">Basic</option>
|
|
193 <option value="advanced">Advanced</option>
|
|
194 </param>
|
|
195 <when value="basic" />
|
|
196 <when value="advanced">
|
|
197 <conditional name="multiple_sequence_alignments_cond">
|
5
|
198 <param name="multiple_sequence_alignments" type="select" label="Set multiple sequence alignments options?">
|
0
|
199 <option value="no" selected="true">No</option>
|
|
200 <option value="yes">Yes</option>
|
|
201 </param>
|
|
202 <when value="no" />
|
|
203 <when value="yes">
|
|
204 <conditional name="multiple_sequence_alignments_option_cond">
|
39
|
205 <param name="multiple_sequence_alignments_option" label="Select method for multiple sequence alignments" type="select" force_select="True">
|
0
|
206 <option value="create_alignments">Create orthogroup protein multiple sequence alignments including scaffold backbone proteins (MAFFT algorithm)</option>
|
|
207 <option value="add_alignments">Add unaligned orthogroup proteins to scaffold backbone multiple sequence alignments (MAFFT algorithm)</option>
|
|
208 <option value="pasta_alignments">Create orthogroup protein multiple sequence alignments including scaffold backbone proteins (PASTA algorithm)</option>
|
|
209 </param>
|
|
210 <when value="create_alignments"/>
|
|
211 <when value="add_alignments"/>
|
|
212 <when value="pasta_alignments">
|
|
213 <param name="pasta_iter_limit" type="integer" value="3" min="0" label="Maximum number of iteration that the PASTA algorithm will run"/>
|
|
214 </when>
|
|
215 </conditional>
|
|
216 </when>
|
|
217 </conditional>
|
|
218 <conditional name="phylogenetic_trees_cond">
|
|
219 <param name="phylogenetic_trees" type="select" label="Set phylogenetic trees options?">
|
|
220 <option value="no" selected="true">No</option>
|
|
221 <option value="yes">Yes</option>
|
|
222 </param>
|
|
223 <when value="no" />
|
|
224 <when value="yes">
|
|
225 <conditional name="tree_inference_cond">
|
|
226 <param name="tree_inference" type="select" label="Phylogenetic trees inference method">
|
|
227 <option value="raxml" selected="true">RAxML</option>
|
|
228 <option value="fasttree">FastTree</option>
|
|
229 </param>
|
|
230 <when value="raxml">
|
14
|
231 <conditional name="rooting_order_file_cond">
|
|
232 <param name="rooting_order_file" type="select" label="Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?">
|
|
233 <option value="no" selected="true">No</option>
|
|
234 <option value="yes">Yes</option>
|
|
235 </param>
|
|
236 <when value="no"/>
|
|
237 <when value="yes">
|
|
238 <param name="rooting_order" format="txt" type="data" label="History item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees"/>
|
|
239 </when>
|
|
240 </conditional>
|
0
|
241 <param name="bootstrap_replicates" type="integer" value="100" min="0" label="Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree"/>
|
|
242 </when>
|
|
243 <when value="fasttree"/>
|
|
244 </conditional>
|
|
245 <param name="max_orthogroup_size" type="integer" value="100" min="0" label="Maximum number of sequences in orthogroup alignments"/>
|
|
246 <param name="min_orthogroup_size" type="integer" value="4" min="0" label="Minimum number of sequences in orthogroup alignments"/>
|
|
247 </when>
|
|
248 </conditional>
|
|
249 <conditional name="msa_quality_control_options_cond">
|
|
250 <param name="msa_quality_control_options" type="select" label="Set MSA quality control options?">
|
|
251 <option value="no" selected="true">No</option>
|
|
252 <option value="yes">Yes</option>
|
|
253 </param>
|
|
254 <when value="no" />
|
|
255 <when value="yes">
|
|
256 <conditional name="remove_gappy_sequences_cond">
|
|
257 <param name="remove_gappy_sequences" type="select" label="Remove gappy sequences in alignments?">
|
|
258 <option value="no" selected="true">No</option>
|
|
259 <option value="yes">Yes</option>
|
|
260 </param>
|
|
261 <when value="no"/>
|
|
262 <when value="yes">
|
|
263 <param name="remove_sequences" type="float" value="0.5" min="0" max="1" label="Remove sequences with gaps of" help="0.5 removes sequences with 50% gaps"/>
|
|
264 <conditional name="trim_type_cond">
|
|
265 <param name="trim_type" type="select" label="Select process used for gap trimming">
|
26
|
266 <option value="gap_trimming" selected="true">Nucleotide based</option>
|
|
267 <option value="automated_trimming">Trim alignments using trimAl's ML heuristic trimming approach</option>
|
0
|
268 </param>
|
|
269 <when value="automated_trimming"/>
|
|
270 <when value="gap_trimming">
|
|
271 <param name="gap_trimming" type="float" value="0.1" min="0" max="1.0" label="Remove sites in alignments with gaps of" help="0.1 removes sites with 90% gaps"/>
|
|
272 </when>
|
|
273 </conditional>
|
|
274 </when>
|
|
275 </conditional>
|
|
276 </when>
|
|
277 </conditional>
|
|
278 </when>
|
|
279 </conditional>
|
|
280 </inputs>
|
|
281 <outputs>
|
42
|
282 <data name="output_fasta_ptcgf" format="ptcgf">
|
50
|
283 <filter>input_format_cond['input_format'] == 'ptcgf'</filter>
|
41
|
284 </data>
|
42
|
285 <data name="output_fasta_ptcgfcs" format="ptcgfcs">
|
50
|
286 <filter>input_format_cond['input_format'] == 'ptcgfcs'</filter>
|
41
|
287 </data>
|
29
|
288 <data name="output_aln" format="txt" label="Multiple alignments on ${on_string}">
|
22
|
289 <filter>options_type['options_type_selector'] == 'advanced' and options_type['multiple_sequence_alignments_cond']['multiple_sequence_alignments'] == 'yes'</filter>
|
14
|
290 </data>
|
41
|
291 <data name="output_tree" format="ptpt" label="Phylogenetic trees on ${on_string}">
|
14
|
292 <filter>options_type['options_type_selector'] == 'advanced' and options_type['phylogenetic_trees_cond']['phylogenetic_trees'] == 'yes'</filter>
|
|
293 </data>
|
0
|
294 </outputs>
|
|
295 <tests>
|
|
296 <test>
|
|
297 </test>
|
|
298 </tests>
|
|
299 <help>
|
|
300 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
|
|
301 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic
|
40
|
302 analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produced by the
|
39
|
303 **Classify gene sequences into precomputed orthologous gene family clusters** tool.
|
0
|
304
|
|
305 -----
|
|
306
|
|
307 **Options**
|
|
308
|
18
|
309 * **Required options**
|
|
310
|
40
|
311 - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences.
|
16
|
312 - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
|
39
|
313 - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters).
|
14
|
314
|
17
|
315 * **Multiple sequence alignments options**
|
14
|
316
|
39
|
317 - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments.
|
41
|
318 - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptcgfcs'.
|
14
|
319 - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments.
|
39
|
320 - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference.
|
41
|
321 - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptcgfcs' or this tool will produce an error.
|
14
|
322
|
17
|
323 * **Phylogenetic trees options**
|
15
|
324
|
14
|
325 - **Phylogenetic trees inference method** - Phylogenetic trees inference method.
|
|
326 - **Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup.
|
39
|
327 - **History item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees** - History item containing a set of string fragments matching sequences identifiers of species in the classification (including scaffold taxa) to be used for determining the most basal taxa in the orthogroups for rooting trees. The set of string fragments must be listed in decreasing order from older to younger lineages.
|
14
|
328 - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree.
|
|
329 - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments.
|
17
|
330 - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments.
|
14
|
331
|
|
332 * **MSA quality control options**
|
|
333
|
15
|
334 - **Remove sequences with gaps of** - Removes gappy sequences in alignments (i.e., 0.5 removes sequences with 50% gaps).
|
|
335 - **Select process used for gap trimming** - Either nucleotide based trimming or alignments are trimed using using trimAl's ML heuristic trimming approach.
|
|
336 - **Remove sites in alignments with gaps of** - If the process used for gap trimming is nucleotide based, this is the gap value used when removing gappy sites in alignments (i.e., 0.1 removes sites with 90% gaps).
|
14
|
337
|
0
|
338 </help>
|
|
339 <citations>
|
|
340 <citation type="bibtex">
|
|
341 @unpublished{None,
|
39
|
342 author = {Eric Wafula},
|
0
|
343 title = {None},
|
|
344 year = {None},
|
40
|
345 url = {https://github.com/dePamphilis/PlantTribes}
|
|
346 }</citation>
|
|
347 <citation type="doi">10.1093/bioinformatics/btw412</citation>
|
|
348 <citation type="bibtex">
|
|
349 @published{Research in Computational Molecular Biology (RECOMB) (pp. 177–191),
|
|
350 author = {Mirarab, S., Nguyen, N., Warnow, T.},
|
|
351 title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)},
|
|
352 year = {2014},
|
|
353 url = {https://github.com/smirarab/pasta}
|
0
|
354 }</citation>
|
40
|
355 <citation type="bibtex">
|
|
356 @published{Bioinformatics,
|
|
357 author = {Salvador Capella-Gutierrez, Jose M. Silla-Martinez, Toni Gabaldon},
|
|
358 title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses},
|
|
359 year = {2009},
|
|
360 url = {http://trimal.cgenomics.org}
|
|
361 }</citation>
|
|
362 <citation type="bibtex">
|
|
363 @published{Bioinformatics,
|
|
364 author = {A. Stamatakis},
|
|
365 title = {RAxML Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies},
|
|
366 year = {2014},
|
|
367 url = {http://sco.h-its.org/exelixis/web/software/raxml/index.html}
|
|
368 }</citation>
|
|
369 <citation type="doi">10.1371/journal.pone.0009490</citation>
|
0
|
370 </citations>
|
|
371 </tool>
|