comparison phylogenomics_analysis.xml @ 42:7e92ab9ee98a draft

Uploaded
author greg
date Fri, 03 Feb 2017 13:35:26 -0500
parents 6b547036c627
children 923a54b9425c
comparison
equal deleted inserted replaced
41:6b547036c627 42:7e92ab9ee98a
1 <tool id="plant_tribes_phylogenomics_analysis" name="Perform phylogenomic analyses" version="0.3"> 1 <tool id="plant_tribes_phylogenomics_analysis" name="Create multiple sequence alignments" version="0.3">
2 <description>by creating multiple sequence alignments and inferred maximum likelihood phylogenies</description> 2 <description>and inferred maximum likelihood phylogenies</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.3">plant_tribes_phylogenomics_analysis</requirement> 4 <requirement type="package" version="0.3">plant_tribes_phylogenomics_analysis</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
7 <!-- Anything other than zero is an error --> 7 <!-- Anything other than zero is an error -->
14 <command> 14 <command>
15 <![CDATA[ 15 <![CDATA[
16 #import os 16 #import os
17 #import sys 17 #import sys
18 18
19 #set scaffold_path = $scaffold.fields.path 19 #set input_format = $input_format_cond.input_format
20 #set scaffold_dir = $os.path.split($scaffold_path)[0] 20 #set pipeline_output_dir = 'phylogenomicsAnalysis_dir'
21 #set scaffold_selection = $os.path.split($scaffold_path)[1] 21 #set scaffold_path = $scaffold.fields.path
22 #set src_fasta_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_fasta') 22 #set scaffold_dir = $os.path.split($scaffold_path)[0]
23 #if str($input.ext) == 'ptcgf': 23 #set scaffold_selection = $os.path.split($scaffold_path)[1]
24 #set dest_fasta_dir = $output_fasta_ptcgf.extra_files_path 24 #set src_aln_dir = $os.path.join($pipeline_output_dir, 'orthogroups_aln')
25 else: 25 #set src_fasta_dir = $os.path.join($pipeline_output_dir, 'orthogroups_fasta')
26 #set dest_fasta_dir =$output_fasta_ptcgfcs.extra_files_path 26 #set src_tree_dir = $os.path.join($pipeline_output_dir, 'orthogroups_tree'
27 #end if 27
28 28 #if str($input_format) == 'ptcgf':
29 #if str($options_type.options_type_selector) == 'advanced' and str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes': 29 #set dest_fasta_dir = $output_fasta_ptcgf.extra_files_path
30 #set generate_output_aln = True 30 #set multiple_codon_alignments = 'no'
31 #set src_aln_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_aln') 31 #set use_corresponding_coding_sequences = 'no
32 #set dest_aln_dir = $output_aln.extra_files_path
33 mkdir -p $dest_aln_dir &&
34 #else: 32 #else:
35 #set generate_output_aln = False 33 ## str($input_format) == 'ptcgfcs':
36 #end if 34 #set dest_fasta_dir = $output_fasta_ptcgfcs.extra_files_path
35 #set multiple_codon_alignments_cond = $input_format_cond.multiple_codon_alignments_cond
36 #set multiple_codon_alignments = $multiple_codon_alignments_cond.multiple_codon_alignments
37 #if str($multiple_codon_alignments) == 'yes':
38 #set use_corresponding_coding_sequences_cond = $multiple_codon_alignments_cond.use_corresponding_coding_sequences_cond
39 #set use_corresponding_coding_sequences = $use_corresponding_coding_sequences_cond.use_corresponding_coding_sequences
40 #else:
41 #set use_corresponding_coding_sequences = 'no'
42 #end if
43 #end if
44 mkdir -p $dest_fasta_dir &&
45
46 #if str($options_type.options_type_selector) == 'advanced':
47 #set multiple_sequence_alignments_cond = $options_type.multiple_sequence_alignments_cond
48 #set multiple_sequence_alignments = $multiple_sequence_alignments_cond.multiple_sequence_alignments
49 #if str($multiple_sequence_alignments) == 'yes':
50 #set multiple_sequence_alignments_option_cond = $options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond
51 #set multiple_sequence_alignments_option = $multiple_sequence_alignments_option_cond.multiple_sequence_alignments_option
52 #set generate_output_aln = True
53 #set dest_aln_dir = $output_aln.extra_files_path
54 mkdir -p $dest_aln_dir &&
55 #else:
56 #set generate_output_aln = False
57 #end if
37 58
38 #if str($options_type.options_type_selector) == 'advanced' and str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes': 59 #if str($options_type.options_type_selector) == 'advanced' and str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes':
39 #set generate_output_tree = True 60 #set generate_output_tree = True
40 #set src_tree_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_tree')
41 #set dest_tree_dir = $output_tree.extra_files_path 61 #set dest_tree_dir = $output_tree.extra_files_path
42 mkdir -p $dest_tree_dir && 62 mkdir -p $dest_tree_dir &&
43 #else: 63 #else:
44 #set generate_output_tree = False 64 #set generate_output_tree = False
45 #end if 65 #end if
46
47 mkdir -p $dest_fasta_dir &&
48 66
49 PhylogenomicsAnalysis 67 PhylogenomicsAnalysis
50 --num_threads \${GALAXY_SLOTS:-4} 68 --num_threads \${GALAXY_SLOTS:-4}
51 --orthogroup_faa '$input.extra_files_path' 69 --orthogroup_faa '$input.extra_files_path'
52 --scaffold_dir '$scaffold_dir' 70 --scaffold_dir '$scaffold_dir'
53 --scaffold '$scaffold_selection' 71 --scaffold '$scaffold_selection'
54 --method $method 72 --method $method
55 73
56 #if str($options_type.options_type_selector) == 'advanced': 74 #if str($options_type.options_type_selector) == 'advanced':
57 #if str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes': 75 #if str($multiple_sequence_alignments) == 'yes':
58 #set multiple_sequence_alignments_option_cond = $options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond
59 #set multiple_sequence_alignments_option = $multiple_sequence_alignments_option_cond.multiple_sequence_alignments_option
60 #if str($multiple_sequence_alignments_option) == 'create_alignments':
61 #set multiple_sequence_alignments_option_str = 'Create alignments (MAFFT algorithm)'
62 #else if str($multiple_sequence_alignments_option) == 'add_alignments':
63 #set multiple_sequence_alignments_option_str = 'Add proteins to alignments (MAFFT algorithm)'
64 #else if str($multiple_sequence_alignments_option) == 'pasta_alignments':
65 #set multiple_sequence_alignments_option_str = 'Create alignments (PASTA algorithm)'
66 #end if
67
68 #set input_includes_coding_sequences_cond = $options_type.multiple_sequence_alignments_cond.input_includes_coding_sequences_cond
69 #set input_includes_coding_sequences = $input_includes_coding_sequences_cond.input_includes_coding_sequences
70 #if str($input_includes_coding_sequences) == 'yes':
71 #set multiple_codon_alignments_cond = $input_includes_coding_sequences_cond.multiple_codon_alignments_cond
72 #set multiple_codon_alignments = $multiple_codon_alignments_cond.multiple_codon_alignments
73 #if str($multiple_codon_alignments) == 'yes':
74 #set use_corresponding_coding_sequences_cond = $multiple_codon_alignments_cond.use_corresponding_coding_sequences_cond
75 #set use_corresponding_coding_sequences = $use_corresponding_coding_sequences_cond.use_corresponding_coding_sequences
76 #else:
77 #set use_corresponding_coding_sequences = 'no'
78 #end if
79 #else:
80 #set multiple_codon_alignments = 'no'
81 #set use_corresponding_coding_sequences = 'no'
82 #end if
83
84 #if str($multiple_sequence_alignments_option) == 'create_alignments': 76 #if str($multiple_sequence_alignments_option) == 'create_alignments':
85 --create_alignments 77 --create_alignments
86 #else if str($multiple_sequence_alignments_option) == 'add_alignments': 78 #else if str($multiple_sequence_alignments_option) == 'add_alignments':
87 --add_alignments 79 --add_alignments
88 #else if str($multiple_sequence_alignments_option) == 'pasta_alignments': 80 #else if str($multiple_sequence_alignments_option) == 'pasta_alignments':
89 --pasta_script_path '$__tool_directory__/run_pasta.py' 81 --pasta_script_path '$__tool_directory__/run_pasta.py'
90 --pasta_alignments 82 --pasta_alignments
91 --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit 83 --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit
92 #end if
93
94 #if str($input_includes_coding_sequences) == 'yes':
95 #if str($multiple_codon_alignments) == 'yes':
96 --codon_alignments
97 #if str($use_corresponding_coding_sequences) == 'yes':
98 #if str($input.ext) == 'ptcgfcs):
99 --orthogroup_fna
100 #else:
101 raise Exception("Selected input does not include corresponding coding sequences")
102 #end if
103 #end if
104 --sequence_type $multiple_codon_alignments_cond.sequence_type
105 #end if
106 #end if 84 #end if
107 #end if 85 #end if
108 86
109 #if str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes': 87 #if str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes':
110 #set tree_inference_cond = $options_type.phylogenetic_trees_cond.tree_inference_cond 88 #set tree_inference_cond = $options_type.phylogenetic_trees_cond.tree_inference_cond
135 #end if 113 #end if
136 #end if 114 #end if
137 #end if 115 #end if
138 #end if 116 #end if
139 117
140 #if str($input.ext) == 'ptcgf': 118 #if str($input_format) == 'ptcgf':
141 && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgf 119 && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgf
142 && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgf 120 && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgf
143 #else: 121 #else:
144 && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgfcs 122 && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgfcs
145 && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgfcs 123 && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgfcs
146 #end if 124 #end if
125
147 && mv $src_fasta_dir/* $dest_fasta_dir || true 126 && mv $src_fasta_dir/* $dest_fasta_dir || true
148 127
149 #if $generate_output_aln: 128 #if $generate_output_aln:
150 && echo "Multiple sequence alignments using method '$multiple_sequence_alignments_option' (note that some files may be empty): `ls $src_aln_dir | grep f | wc -l` files" > $output_aln 129 && echo "Multiple sequence alignments using method '$multiple_sequence_alignments_option' (note that some files may be empty): `ls $src_aln_dir | grep f | wc -l` files" > $output_aln
151 && ls -al $src_aln_dir | grep f >> $output_aln 130 && ls -al $src_aln_dir | grep f >> $output_aln
158 && mv $src_tree_dir/* $dest_tree_dir || true 137 && mv $src_tree_dir/* $dest_tree_dir || true
159 #end if 138 #end if
160 ]]> 139 ]]>
161 </command> 140 </command>
162 <inputs> 141 <inputs>
163 <param name="input" format="ptcgf,ptcgfcs" type="data" label="Select gene family clusters" help="Selection may optionally include corresponding coding sequences"/> 142 <conditional name="input_format_cond">
143 <param name="input_format" type="select" label="Select type of data to sub sample">
144 <option value="ptcgf">Gene family clusters</option>
145 <option value="ptcgfcs">Gene family clusters and corresponding coding sequences</option>
146 </param>
147 <when value="ptcgf">
148 <param name="input_ptcgf" format="ptcgf" type="data" label="Gene family clusters"/>
149 </when>
150 <when value="ptcgfcs">
151 <param name="input_ptcgfcs" format="ptcgfcs" type="data" label="Gene family clusters with corresponding coding sequences"/>
152 <conditional name="multiple_codon_alignments_cond">
153 <param name="multiple_codon_alignments" type="select" label="Construct orthogroup multiple codon alignments?">
154 <option value="no" selected="True">No</option>
155 <option value="yes">Yes</option>
156 </param>
157 <when value="no"/>
158 <when value="yes">
159 <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference - (dna)">
160 <option value="protein" selected="true">Amino acid based</option>
161 <option value="dna">Nucleotide based</option>
162 </param>
163 <conditional name="use_corresponding_coding_sequences_cond">
164 <param name="use_corresponding_coding_sequences" type="select" label="Use corresponding coding sequences?">
165 <option value="no" selected="true">No</option>
166 <option value="yes">Yes</option>
167 </param>
168 <when value="no" />
169 <when value="yes" />
170 </conditional>
171 </when>
172 </conditional>
173 </when>
174 </conditional>
164 <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold"> 175 <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
165 <options from_data_table="plant_tribes_scaffolds" /> 176 <options from_data_table="plant_tribes_scaffolds" />
166 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/> 177 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
167 </param> 178 </param>
168 <param name="method" type="select" label="Protein clustering method"> 179 <param name="method" type="select" label="Protein clustering method">
169 <option value="gfam" selected="true">GFam</option> 180 <option value="gfam" selected="true">GFam</option>
170 <option value="orthofinder">OrthoFinder</option> 181 <option value="orthofinder">OrthoFinder</option>
171 <option value="orthomcl">OrthoMCL</option> 182 <option value="orthomcl">OrthoMCL</option>
192 </param> 203 </param>
193 <when value="create_alignments"/> 204 <when value="create_alignments"/>
194 <when value="add_alignments"/> 205 <when value="add_alignments"/>
195 <when value="pasta_alignments"> 206 <when value="pasta_alignments">
196 <param name="pasta_iter_limit" type="integer" value="3" min="0" label="Maximum number of iteration that the PASTA algorithm will run"/> 207 <param name="pasta_iter_limit" type="integer" value="3" min="0" label="Maximum number of iteration that the PASTA algorithm will run"/>
197 </when>
198 </conditional>
199 <conditional name="input_includes_coding_sequences_cond">
200 <param name="input_includes_coding_sequences" type="select" label="Input sequences include corresponding coding sequences?" help="Selected input data format must be 'pgfccs'">
201 <option value="no" selected="true">No</option>
202 <option value="yes">Yes</option>
203 </param>
204 <when value="no"/>
205 <when value="yes">
206 <conditional name="multiple_codon_alignments_cond">
207 <param name="multiple_codon_alignments" label="Construct orthogroup multiple codon alignments?" type="select">
208 <option value="no" selected="True">No</option>
209 <option value="yes">Yes</option>
210 </param>
211 <when value="no"/>
212 <when value="yes">
213 <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference - (dna)">
214 <option value="protein" selected="true">Amino acid based</option>
215 <option value="dna">Nucleotide based</option>
216 </param>
217 <conditional name="use_corresponding_coding_sequences_cond">
218 <param name="use_corresponding_coding_sequences" type="select" label="Use corresponding coding sequences?">
219 <option value="no" selected="true">No</option>
220 <option value="yes">Yes</option>
221 </param>
222 <when value="no" />
223 <when value="yes" />
224 </conditional>
225 </when>
226 </conditional>
227 </when> 208 </when>
228 </conditional> 209 </conditional>
229 </when> 210 </when>
230 </conditional> 211 </conditional>
231 <conditional name="phylogenetic_trees_cond"> 212 <conditional name="phylogenetic_trees_cond">
290 </conditional> 271 </conditional>
291 </when> 272 </when>
292 </conditional> 273 </conditional>
293 </inputs> 274 </inputs>
294 <outputs> 275 <outputs>
295 <data name="output_fasta_ptcgf" format="ptcgf" label="Sequence alignments and corresponding phylogenies on ${on_string}"> 276 <data name="output_fasta_ptcgf" format="ptcgf">
296 <filter>input.ext == 'ptcgf'</filter> 277 <filter>input_format_cond['input_format] == 'ptcgf'</filter>
297 </data> 278 </data>
298 <data name="output_fasta_ptcgfcs" format="ptcgfcs" label="Sequence alignments and corresponding phylogenies on ${on_string}"> 279 <data name="output_fasta_ptcgfcs" format="ptcgfcs">
299 <filter>input.ext == 'ptcgfcs'</filter> 280 <filter>input_format_cond['input_format] == 'ptcgfcs'</filter>
300 </data> 281 </data>
301 <data name="output_aln" format="txt" label="Multiple alignments on ${on_string}"> 282 <data name="output_aln" format="txt" label="Multiple alignments on ${on_string}">
302 <filter>options_type['options_type_selector'] == 'advanced' and options_type['multiple_sequence_alignments_cond']['multiple_sequence_alignments'] == 'yes'</filter> 283 <filter>options_type['options_type_selector'] == 'advanced' and options_type['multiple_sequence_alignments_cond']['multiple_sequence_alignments'] == 'yes'</filter>
303 </data> 284 </data>
304 <data name="output_tree" format="ptpt" label="Phylogenetic trees on ${on_string}"> 285 <data name="output_tree" format="ptpt" label="Phylogenetic trees on ${on_string}">