Mercurial > repos > greg > phylogenomics_analysis
diff phylogenomics_analysis.xml @ 14:a8a4f6009451 draft
Uploaded
author | greg |
---|---|
date | Fri, 06 Jan 2017 13:58:59 -0500 |
parents | b37db5a8ed13 |
children | 34aa10ba20ef |
line wrap: on
line diff
--- a/phylogenomics_analysis.xml Thu Jan 05 15:15:47 2017 -0500 +++ b/phylogenomics_analysis.xml Fri Jan 06 13:58:59 2017 -0500 @@ -14,42 +14,64 @@ <![CDATA[ #import os #import sys - #set invalid_input = "The selected input is not a valid list of sequences classified into gene family clusters, select another input." - #if not $os.path.exists($orthogroup_faa.extra_files_path): - print > $sys.stderr($invalid_input) - $sys.exit(1) + + #set invalid_input_msg = "The selected input is not a valid list of sequences classified into gene family clusters. Select an input with the label 'Sequences classified into gene family clusters on...'." + #set no_coding_sequences_msg = "No corresponding coding sequences were generated for the selected input. You must not have selected 'Yes' for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool for the selected input." + + #set input_extra_files_path = $orthogroup_faa.extra_files_path + #if not $os.path.exists($input_extra_files_path): + #raise Exception($invalid_input_msg) #end if - #set src_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_fasta') - #set num_orthogroup_faa = 0 - #for f in $os.listdir($src_dir): - #if $f.endswith('.faa'): - #set num_orthogroup_faa = $num_orthogroup_faa + 1 - #end if - #end for + + #set num_orthogroup_faa = $len([f for f in $os.listdir($input_extra_files_path) if f.endswith('.faa')]) #if $num_orthogroup_faa == 0: - print > $sys.stderr($invalid_input) - $sys.exit(1) + #raise Exception($invalid_input_msg) + #end if + + #set num_orthogroup_fna = $len([f for f in $os.listdir($input_extra_files_path) if f.endswith('.fna')]) + + #set src_fasta_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_fasta') + #set dest_fasta_dir = $output_fasta.extra_files_path + + #if str($options_type.options_type_selector) == 'advanced' and str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes' and str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond.'multiple_sequence_alignments_option == 'pasta_alignments': + #set generate_output_aln = True + #set src_aln_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_aln') + #set dest_aln_dir = $output_aln.extra_files_path + mkdir -p $dest_aln_dir && + #else: + #set generate_output_aln = False #end if - #set num_orthogroup_fna = 0 - #for f in $os.listdir($src_dir): - #if $f.endswith('.fna'): - #set num_orthogroup_fna = $num_orthogroup_fna + 1 - #end if - #end for - #set dest_dir = $output.extra_files_path - mkdir -p $dest_dir && + + #if str($options_type.options_type_selector) == 'advanced' and str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes': + #set generate_output_tree = True + #set src_tree_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_tree') + #set dest_tree_dir = $output_tree.extra_files_path + mkdir -p $dest_tree_dir && + #else: + #set generate_output_tree = False + #end if + + mkdir -p $dest_fasta_dir && + PhylogenomicsAnalysis + --num_threads \${GALAXY_SLOTS:-4} --orthogroup_faa "$orthogroup_faa.extra_files_path" + --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds" --scaffold "$scaffold" --method $method + #if str($options_type.options_type_selector) == 'advanced': #if str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes': #set multiple_sequence_alignments_option_cond = $options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond #set multiple_sequence_alignments_option = $multiple_sequence_alignments_option_cond.multiple_sequence_alignments_option - #set multiple_codon_alignments_cond = $options_type.multiple_sequence_alignments_cond.multiple_codon_alignments_cond - #set multiple_codon_alignments = $multiple_codon_alignments_cond.multiple_codon_alignments - #set use_corresponding_coding_sequences_cond = $multiple_codon_alignments_cond.use_corresponding_coding_sequences_cond - #set use_corresponding_coding_sequences = $use_corresponding_coding_sequences_cond.use_corresponding_coding_sequences + #set input_includes_coding_sequences_cond = $options_type.multiple_sequence_alignments_cond.input_includes_coding_sequences_cond + #set input_includes_coding_sequences = $input_includes_coding_sequences_cond.input_includes_coding_sequences + #if str($input_includes_coding_sequences) == 'yes': + #set multiple_codon_alignments_cond = $input_includes_coding_sequences_cond.multiple_codon_alignments_cond + #set multiple_codon_alignments = $multiple_codon_alignments_cond.multiple_codon_alignments + #set use_corresponding_coding_sequences_cond = $multiple_codon_alignments_cond.use_corresponding_coding_sequences_cond + #set use_corresponding_coding_sequences = $use_corresponding_coding_sequences_cond.use_corresponding_coding_sequences + #end if #if str($multiple_sequence_alignments_option) == 'create_alignments': --create_alignments #else if str($multiple_sequence_alignments_option) == 'add_alignments': @@ -58,29 +80,36 @@ --pasta_alignments --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit #end if - #if str($multiple_codon_alignments) == 'yes': - --codon_alignments - #if str($use_corresponding_coding_sequences) == 'yes': - #if $num_orthogroup_fna == 0: - print > $sys.stderr("No corresponding coding sequences were generated for the selected input. You must not have selected 'Yes' for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool for the selected input.") - $sys.exit(1) + #if str($input_includes_coding_sequences) == 'yes': + #if str($multiple_codon_alignments) == 'yes': + --codon_alignments + #if str($use_corresponding_coding_sequences) == 'yes': + #if $num_orthogroup_fna == 0: + #raise Exception($no_coding_sequences_msg) + #end if + --orthogroup_fna #end if - --orthogroup_fna + --sequence_type $multiple_codon_alignments_cond.sequence_type #end if - --sequence_type $multiple_codon_alignments_cond.sequence_type #end if #end if + #if str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes': #set tree_inference_cond = $options_type.phylogenetic_trees_cond.tree_inference_cond #set tree_inference_method = $tree_inference_cond.tree_inference --tree_inference $tree_inference_method #if str($tree_inference_method) == 'raxml': - --rooting_order "$tree_inference_cond.rooting_order" + #set rooting_order_file_cond = $tree_inference_cond.rooting_order_file_cond + #set rooting_order_file = $rooting_order_file_cond.rooting_order_file + #if str($rooting_order_file) == 'yes': + --rooting_order "$rooting_order_file.rooting_order" + #end if --bootstrap_replicates $tree_inference_cond.bootstrap_replicates #end if --max_orthogroup_size $options_type.phylogenetic_trees_cond.max_orthogroup_size --min_orthogroup_size $options_type.phylogenetic_trees_cond.min_orthogroup_size #end if + #if str($options_type.msa_quality_control_options_cond.msa_quality_control_options) == 'yes': #set msa_quality_control_options_cond = $options_type.msa_quality_control_options_cond #set remove_gappy_sequences_cond = $msa_quality_control_options_cond.remove_gappy_sequences_cond @@ -95,11 +124,22 @@ #end if #end if #end if - --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds" - --num_threads \${GALAXY_SLOTS:-4} - && echo "Sequence alignments and corresponding phylogenies: `ls $src_dir | wc -l` files" > $output - && ls $src_dir >> $output - && mv $src_dir/* $dest_dir + + && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | wc -l` files" > $output_fasta + && ls $src_fasta_dir >> $output_fasta + && mv $src_fasta_dir/* $dest_fasta_dir + + #if $generate_output_aln: + && echo "Sequence alignments and corresponding phylogenies using PASTA: `ls $src_aln_dir | wc -l` files" > $output_aln + && ls $src_aln_dir >> $output_aln + && mv $src_aln_dir/* $dest_aln_dir + #end if + + #if $generate_output_tree: + && echo "Phylogenetic trees: `ls $src_tree_dir | wc -l` files" > $output_tree + && ls $src_tree_dir >> $output_tree + && mv $src_tree_dir/* $dest_tree_dir + #end if ]]> </command> <inputs> @@ -128,7 +168,7 @@ <when value="no" /> <when value="yes"> <conditional name="multiple_sequence_alignments_option_cond"> - <param name="multiple_sequence_alignments_option" label="Select option" type="select" force_select="True"> + <param name="multiple_sequence_alignments_option" label="Select process for multiple sequence alignments" type="select" force_select="True"> <option value="create_alignments">Create orthogroup protein multiple sequence alignments including scaffold backbone proteins (MAFFT algorithm)</option> <option value="add_alignments">Add unaligned orthogroup proteins to scaffold backbone multiple sequence alignments (MAFFT algorithm)</option> <option value="pasta_alignments">Create orthogroup protein multiple sequence alignments including scaffold backbone proteins (PASTA algorithm)</option> @@ -139,26 +179,35 @@ <param name="pasta_iter_limit" type="integer" value="3" min="0" label="Maximum number of iteration that the PASTA algorithm will run"/> </when> </conditional> - <conditional name="multiple_codon_alignments_cond"> - <param name="multiple_codon_alignments" label="Construct orthogroup multiple codon alignments?" type="select"> - <option value="no" selected="True">No</option> + <conditional name="input_includes_coding_sequences_cond"> + <param name="input_includes_coding_sequences" type="select" label="Input sequences include corresponding coding sequences?" help="Selected 'Yes' for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool for the selected input"> + <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> + <when value="no"/> <when value="yes"> - <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference - (dna)"> - <option value="protein" selected="true">Amino acid based</option> - <option value="dna">Nucleotide based</option> - </param> - <conditional name="use_corresponding_coding_sequences_cond"> - <param name="use_corresponding_coding_sequences" type="select" label="Use corresponding coding sequences if available?" help="Must have selected 'Yes' for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool for the selected input above"> - <option value="no" selected="true">No</option> + <conditional name="multiple_codon_alignments_cond"> + <param name="multiple_codon_alignments" label="Construct orthogroup multiple codon alignments?" type="select"> + <option value="no" selected="True">No</option> <option value="yes">Yes</option> </param> - <when value="no" /> - <when value="yes" /> + <when value="no"/> + <when value="yes"> + <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference - (dna)"> + <option value="protein" selected="true">Amino acid based</option> + <option value="dna">Nucleotide based</option> + </param> + <conditional name="use_corresponding_coding_sequences_cond"> + <param name="use_corresponding_coding_sequences" type="select" label="Use corresponding coding sequences?"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes" /> + </conditional> + </when> </conditional> </when> - <when value="no"/> </conditional> </when> </conditional> @@ -175,7 +224,16 @@ <option value="fasttree">FastTree</option> </param> <when value="raxml"> - <param name="rooting_order" format="txt" type="data" label="Select file containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees"/> + <conditional name="rooting_order_file_cond"> + <param name="rooting_order_file" type="select" label="Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="rooting_order" format="txt" type="data" label="History item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees"/> + </when> + </conditional> <param name="bootstrap_replicates" type="integer" value="100" min="0" label="Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree"/> </when> <when value="fasttree"/> @@ -217,18 +275,16 @@ </conditional> </inputs> <outputs> - <data name="output" format="txt" label="Sequence alignments and corresponding phylogenies on ${on_string}"/> + <data name="output_fasta" format="txt" label="Sequence alignments and corresponding phylogenies on ${on_string}"/> + <data name="output_aln" format="txt" label="Sequence alignments and corresponding phylogenies using PASTA on ${on_string}"> + <filter>options_type['options_type_selector'] == 'advanced' and options_type['multiple_sequence_alignments_cond']['multiple_sequence_alignments'] == 'yes' and options_type['multiple_sequence_alignments_cond']['multiple_sequence_alignments_option_cond']['multiple_sequence_alignments_option'] == 'pasta_alignments'</filter> + </data> + <data name="output_tree" format="txt" label="Phylogenetic trees on ${on_string}"> + <filter>options_type['options_type_selector'] == 'advanced' and options_type['phylogenetic_trees_cond']['phylogenetic_trees'] == 'yes'</filter> + </data> </outputs> <tests> <test> - <param name="input" value="" ftype="fasta" /> - <param name="prediction_method" value="transdecoder"/> - <param name="target_gene_family_assembly" value="no"/> - <param name="strand_specific" value="yes"/> - <param name="dereplicate" value="yes"/> - <param name="min_length" value="200"/> - <output_collection name="orthos" type="list"> - </output_collection> </test> </tests> <help> @@ -240,14 +296,30 @@ **Options** + * **Select sequences classified into gene family clusters** - A history item with the label 'Sequences classified into gene family clusters on...'. * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL. - * **Protein classification method** - One of blastp, HMMScan or both. - * **SuperOrthogroups MCL clustering** - blastp e-value matrix between all pairs of orthogroups. - * **Minumum single copy taxa required in orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only. - * **Minumum taxa required in single copy orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only. - * **Corresponding coding sequences (CDS) fasta file** - Used only when selecting "Create orthogroup fasta files?". - + + * **Multiple sequence alignments options** + + - **Select process for multiple sequence alignments** - Process used for setting multiple sequence alignments. + - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that 'Yes' was selected for the 'Create orthogroup fasta files?' option on the GeneFamilyClassifier tool when generating the selected input. This tool will produce an error if 'No' was selected. + - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments. + - **Sequence type used in the phylogenetic inference** - Sequence type used in the phylogenetic inference. + - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that 'Yes' was selected for the 'Create corresponding coding sequences?' option on the GeneFamilyClassifier tool when generating the selected input. This tool will produce an error if 'No' was selected. + + * **Phylogenetic trees options** + - **Phylogenetic trees inference method** - Phylogenetic trees inference method. + - **Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup. + - **History item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees** -History item containing a set of string fragments matching sequences identifiers of species in the classification (including scaffold taxa) to be used for determining the most basal taxa in the orthogroups for rooting trees. The set of string fragments must be listed in decreasing order from older to younger lineages. + - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree. + - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments. + - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments + +* **MSA quality control options** + + - **Remove gappy sequences in alignments?** - Removes gappy sites in alignments (e.g. 0.1 removes sites with 90% gaps). + </help> <citations> <citation type="bibtex">