comparison phylogenomics_analysis.xml @ 40:0bf214a0646a draft

Uploaded
author greg
date Thu, 02 Feb 2017 11:38:52 -0500
parents 53f404994667
children 6b547036c627
comparison
equal deleted inserted replaced
39:53f404994667 40:0bf214a0646a
4 <requirement type="package" version="0.3">plant_tribes_phylogenomics_analysis</requirement> 4 <requirement type="package" version="0.3">plant_tribes_phylogenomics_analysis</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
7 <!-- Anything other than zero is an error --> 7 <!-- Anything other than zero is an error -->
8 <exit_code range="1:" /> 8 <exit_code range="1:" />
9 <!-- In case the return code has not been set properly check stderr too --> 9 <exit_code range=":-1" />
10 <!-- In case the return code has not been set propery check stderr too -->
10 <regex match="Error:" /> 11 <regex match="Error:" />
11 <regex match="Exception:" /> 12 <regex match="Exception:" />
12 </stdio> 13 </stdio>
13 <command> 14 <command>
14 <![CDATA[ 15 <![CDATA[
15 #import os 16 #import os
16 #import sys 17 #import sys
17 18
18 #set invalid_input_msg = "The selected input is not a valid list of sequences classified into gene family clusters. Select an input with the label 'Sequences classified into gene family clusters on...'." 19 #set scaffold_path = $scaffold.fields.path
19 #set no_coding_sequences_msg = "No corresponding coding sequences were generated for the selected input. You must not have selected 'Yes' for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool for the selected input." 20 #set scaffold_dir = $os.path.split($scaffold_path)[0]
20 21 #set scaffold_selection = $os.path.split($scaffold_path)[1]
21 #set input_extra_files_path = $orthogroup_faa.extra_files_path
22 #if not $os.path.exists($input_extra_files_path):
23 #raise Exception($invalid_input_msg)
24 #end if
25
26 #set num_orthogroup_faa = $len([f for f in $os.listdir($input_extra_files_path) if f.endswith('.faa')])
27 #if $num_orthogroup_faa == 0:
28 #raise Exception($invalid_input_msg)
29 #end if
30
31 #set num_orthogroup_fna = $len([f for f in $os.listdir($input_extra_files_path) if f.endswith('.fna')])
32
33 #set src_fasta_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_fasta') 22 #set src_fasta_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_fasta')
34 #set dest_fasta_dir = $output_fasta.extra_files_path 23 #set dest_fasta_dir = $output_fasta.extra_files_path
35 24
36 #if str($options_type.options_type_selector) == 'advanced' and str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes': 25 #if str($options_type.options_type_selector) == 'advanced' and str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes':
37 #set generate_output_aln = True 26 #set generate_output_aln = True
53 42
54 mkdir -p $dest_fasta_dir && 43 mkdir -p $dest_fasta_dir &&
55 44
56 PhylogenomicsAnalysis 45 PhylogenomicsAnalysis
57 --num_threads \${GALAXY_SLOTS:-4} 46 --num_threads \${GALAXY_SLOTS:-4}
58 --orthogroup_faa "$orthogroup_faa.extra_files_path" 47 --orthogroup_faa '$input.extra_files_path'
59 --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds" 48 --scaffold_dir '$scaffold_dir'
60 --scaffold "$scaffold" 49 --scaffold '$scaffold_selection'
61 --method $method 50 --method $method
62 51
63 #if str($options_type.options_type_selector) == 'advanced': 52 #if str($options_type.options_type_selector) == 'advanced':
64 #if str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes': 53 #if str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes':
65 #set multiple_sequence_alignments_option_cond = $options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond 54 #set multiple_sequence_alignments_option_cond = $options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond
91 #if str($multiple_sequence_alignments_option) == 'create_alignments': 80 #if str($multiple_sequence_alignments_option) == 'create_alignments':
92 --create_alignments 81 --create_alignments
93 #else if str($multiple_sequence_alignments_option) == 'add_alignments': 82 #else if str($multiple_sequence_alignments_option) == 'add_alignments':
94 --add_alignments 83 --add_alignments
95 #else if str($multiple_sequence_alignments_option) == 'pasta_alignments': 84 #else if str($multiple_sequence_alignments_option) == 'pasta_alignments':
96 --pasta_script_path "$__tool_directory__/run_pasta.py" 85 --pasta_script_path '$__tool_directory__/run_pasta.py'
97 --pasta_alignments 86 --pasta_alignments
98 --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit 87 --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit
99 #end if 88 #end if
100 89
101 #if str($input_includes_coding_sequences) == 'yes': 90 #if str($input_includes_coding_sequences) == 'yes':
102 #if str($multiple_codon_alignments) == 'yes': 91 #if str($multiple_codon_alignments) == 'yes':
103 --codon_alignments 92 --codon_alignments
104 #if str($use_corresponding_coding_sequences) == 'yes': 93 #if str($use_corresponding_coding_sequences) == 'yes':
105 #if $num_orthogroup_fna == 0: 94 #if str($input.ext) == 'pgfccs):
106 #raise Exception($no_coding_sequences_msg) 95 --orthogroup_fna
107 #end if 96 #else:
108 --orthogroup_fna 97 raise Exception("Selected input does not include corresponding coding sequences")
98 #end if
109 #end if 99 #end if
110 --sequence_type $multiple_codon_alignments_cond.sequence_type 100 --sequence_type $multiple_codon_alignments_cond.sequence_type
111 #end if 101 #end if
112 #end if 102 #end if
113 #end if 103 #end if
118 --tree_inference $tree_inference_method 108 --tree_inference $tree_inference_method
119 #if str($tree_inference_method) == 'raxml': 109 #if str($tree_inference_method) == 'raxml':
120 #set rooting_order_file_cond = $tree_inference_cond.rooting_order_file_cond 110 #set rooting_order_file_cond = $tree_inference_cond.rooting_order_file_cond
121 #set rooting_order_file = $rooting_order_file_cond.rooting_order_file 111 #set rooting_order_file = $rooting_order_file_cond.rooting_order_file
122 #if str($rooting_order_file) == 'yes': 112 #if str($rooting_order_file) == 'yes':
123 --rooting_order "$rooting_order_file.rooting_order" 113 --rooting_order '$rooting_order_file.rooting_order'
124 #end if 114 #end if
125 --bootstrap_replicates $tree_inference_cond.bootstrap_replicates 115 --bootstrap_replicates $tree_inference_cond.bootstrap_replicates
126 #end if 116 #end if
127 --max_orthogroup_size $options_type.phylogenetic_trees_cond.max_orthogroup_size 117 --max_orthogroup_size $options_type.phylogenetic_trees_cond.max_orthogroup_size
128 --min_orthogroup_size $options_type.phylogenetic_trees_cond.min_orthogroup_size 118 --min_orthogroup_size $options_type.phylogenetic_trees_cond.min_orthogroup_size
159 && mv $src_tree_dir/* $dest_tree_dir || true 149 && mv $src_tree_dir/* $dest_tree_dir || true
160 #end if 150 #end if
161 ]]> 151 ]]>
162 </command> 152 </command>
163 <inputs> 153 <inputs>
164 <param name="orthogroup_faa" format="txt" type="data" label="Select sequences classified into gene family clusters"/> 154 <param name="input" format="pgfc,pgfccs" type="data" label="Select gene family clusters" help="Selection may optionally include corresponding coding sequences"/>
165 <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold"> 155 <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
166 <options from_data_table="plant_tribes_scaffolds" /> 156 <options from_data_table="plant_tribes_scaffolds" />
167 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/> 157 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/>
168 </param> 158 </param>
169 <param name="method" type="select" label="Protein clustering method"> 159 <param name="method" type="select" label="Protein clustering method">
196 <when value="pasta_alignments"> 186 <when value="pasta_alignments">
197 <param name="pasta_iter_limit" type="integer" value="3" min="0" label="Maximum number of iteration that the PASTA algorithm will run"/> 187 <param name="pasta_iter_limit" type="integer" value="3" min="0" label="Maximum number of iteration that the PASTA algorithm will run"/>
198 </when> 188 </when>
199 </conditional> 189 </conditional>
200 <conditional name="input_includes_coding_sequences_cond"> 190 <conditional name="input_includes_coding_sequences_cond">
201 <param name="input_includes_coding_sequences" type="select" label="Input sequences include corresponding coding sequences?" help="Selected 'Yes' for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool for the selected input"> 191 <param name="input_includes_coding_sequences" type="select" label="Input sequences include corresponding coding sequences?" help="Selected input data format must be 'pgfccs'">
202 <option value="no" selected="true">No</option> 192 <option value="no" selected="true">No</option>
203 <option value="yes">Yes</option> 193 <option value="yes">Yes</option>
204 </param> 194 </param>
205 <when value="no"/> 195 <when value="no"/>
206 <when value="yes"> 196 <when value="yes">
306 </test> 296 </test>
307 </tests> 297 </tests>
308 <help> 298 <help>
309 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of 299 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
310 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic 300 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic
311 analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produces by the 301 analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produced by the
312 **Classify gene sequences into precomputed orthologous gene family clusters** tool. 302 **Classify gene sequences into precomputed orthologous gene family clusters** tool.
313 303
314 ----- 304 -----
315 305
316 **Options** 306 **Options**
317 307
318 * **Required options** 308 * **Required options**
319 309
320 - **Select sequences classified into gene family clusters** - A history item with the label 'Sequences classified into gene family clusters on...'. 310 - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences.
321 - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. 311 - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
322 - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters). 312 - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters).
323 313
324 * **Multiple sequence alignments options** 314 * **Multiple sequence alignments options**
325 315
326 - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments. 316 - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments.
327 - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that 'Yes' was selected for the 'Create orthogroup fasta files?' option on the 'Classify gene sequences into precomputed orthologous gene family clusters' tool when generating the selected input. This tool will produce an error if 'No' was selected. 317 - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'pgfccs'.
328 - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments. 318 - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments.
329 - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference. 319 - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference.
330 - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that 'Yes' was selected for the 'Create corresponding coding sequences?' option on the 'Classify gene sequences into precomputed orthologous gene family clusters' tool when generating the selected input. This tool will produce an error if 'No' was selected. 320 - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'pgfccs' or this tool will produce an error.
331 321
332 * **Phylogenetic trees options** 322 * **Phylogenetic trees options**
333 323
334 - **Phylogenetic trees inference method** - Phylogenetic trees inference method. 324 - **Phylogenetic trees inference method** - Phylogenetic trees inference method.
335 - **Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup. 325 - **Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup.
349 <citation type="bibtex"> 339 <citation type="bibtex">
350 @unpublished{None, 340 @unpublished{None,
351 author = {Eric Wafula}, 341 author = {Eric Wafula},
352 title = {None}, 342 title = {None},
353 year = {None}, 343 year = {None},
354 eprint = {None}, 344 url = {https://github.com/dePamphilis/PlantTribes}
355 url = {None}
356 }</citation> 345 }</citation>
346 <citation type="doi">10.1093/bioinformatics/btw412</citation>
347 <citation type="bibtex">
348 @published{Research in Computational Molecular Biology (RECOMB) (pp. 177–191),
349 author = {Mirarab, S., Nguyen, N., Warnow, T.},
350 title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)},
351 year = {2014},
352 url = {https://github.com/smirarab/pasta}
353 }</citation>
354 <citation type="bibtex">
355 @published{Bioinformatics,
356 author = {Salvador Capella-Gutierrez, Jose M. Silla-Martinez, Toni Gabaldon},
357 title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses},
358 year = {2009},
359 url = {http://trimal.cgenomics.org}
360 }</citation>
361 <citation type="bibtex">
362 @published{Bioinformatics,
363 author = {A. Stamatakis},
364 title = {RAxML Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies},
365 year = {2014},
366 url = {http://sco.h-its.org/exelixis/web/software/raxml/index.html}
367 }</citation>
368 <citation type="doi">10.1371/journal.pone.0009490</citation>
357 </citations> 369 </citations>
358 </tool> 370 </tool>