# HG changeset patch # User greg # Date 1486053532 18000 # Node ID 0bf214a0646a4148e145e19756ae1db464bf95c8 # Parent 53f404994667645b10275c8c19be0950fbd8a4eb Uploaded diff -r 53f404994667 -r 0bf214a0646a phylogenomics_analysis.xml --- a/phylogenomics_analysis.xml Fri Jan 27 15:03:32 2017 -0500 +++ b/phylogenomics_analysis.xml Thu Feb 02 11:38:52 2017 -0500 @@ -6,7 +6,8 @@ - + + @@ -15,21 +16,9 @@ #import os #import sys - #set invalid_input_msg = "The selected input is not a valid list of sequences classified into gene family clusters. Select an input with the label 'Sequences classified into gene family clusters on...'." - #set no_coding_sequences_msg = "No corresponding coding sequences were generated for the selected input. You must not have selected 'Yes' for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool for the selected input." - - #set input_extra_files_path = $orthogroup_faa.extra_files_path - #if not $os.path.exists($input_extra_files_path): - #raise Exception($invalid_input_msg) - #end if - - #set num_orthogroup_faa = $len([f for f in $os.listdir($input_extra_files_path) if f.endswith('.faa')]) - #if $num_orthogroup_faa == 0: - #raise Exception($invalid_input_msg) - #end if - - #set num_orthogroup_fna = $len([f for f in $os.listdir($input_extra_files_path) if f.endswith('.fna')]) - + #set scaffold_path = $scaffold.fields.path + #set scaffold_dir = $os.path.split($scaffold_path)[0] + #set scaffold_selection = $os.path.split($scaffold_path)[1] #set src_fasta_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_fasta') #set dest_fasta_dir = $output_fasta.extra_files_path @@ -55,9 +44,9 @@ PhylogenomicsAnalysis --num_threads \${GALAXY_SLOTS:-4} - --orthogroup_faa "$orthogroup_faa.extra_files_path" - --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds" - --scaffold "$scaffold" + --orthogroup_faa '$input.extra_files_path' + --scaffold_dir '$scaffold_dir' + --scaffold '$scaffold_selection' --method $method #if str($options_type.options_type_selector) == 'advanced': @@ -93,7 +82,7 @@ #else if str($multiple_sequence_alignments_option) == 'add_alignments': --add_alignments #else if str($multiple_sequence_alignments_option) == 'pasta_alignments': - --pasta_script_path "$__tool_directory__/run_pasta.py" + --pasta_script_path '$__tool_directory__/run_pasta.py' --pasta_alignments --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit #end if @@ -102,10 +91,11 @@ #if str($multiple_codon_alignments) == 'yes': --codon_alignments #if str($use_corresponding_coding_sequences) == 'yes': - #if $num_orthogroup_fna == 0: - #raise Exception($no_coding_sequences_msg) - #end if - --orthogroup_fna + #if str($input.ext) == 'pgfccs): + --orthogroup_fna + #else: + raise Exception("Selected input does not include corresponding coding sequences") + #end if #end if --sequence_type $multiple_codon_alignments_cond.sequence_type #end if @@ -120,7 +110,7 @@ #set rooting_order_file_cond = $tree_inference_cond.rooting_order_file_cond #set rooting_order_file = $rooting_order_file_cond.rooting_order_file #if str($rooting_order_file) == 'yes': - --rooting_order "$rooting_order_file.rooting_order" + --rooting_order '$rooting_order_file.rooting_order' #end if --bootstrap_replicates $tree_inference_cond.bootstrap_replicates #end if @@ -161,7 +151,7 @@ ]]> - + @@ -198,7 +188,7 @@ - + @@ -308,7 +298,7 @@ This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic -analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produces by the +analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produced by the **Classify gene sequences into precomputed orthologous gene family clusters** tool. ----- @@ -317,17 +307,17 @@ * **Required options** - - **Select sequences classified into gene family clusters** - A history item with the label 'Sequences classified into gene family clusters on...'. + - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences. - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters). * **Multiple sequence alignments options** - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments. - - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that 'Yes' was selected for the 'Create orthogroup fasta files?' option on the 'Classify gene sequences into precomputed orthologous gene family clusters' tool when generating the selected input. This tool will produce an error if 'No' was selected. + - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'pgfccs'. - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments. - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference. - - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that 'Yes' was selected for the 'Create corresponding coding sequences?' option on the 'Classify gene sequences into precomputed orthologous gene family clusters' tool when generating the selected input. This tool will produce an error if 'No' was selected. + - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'pgfccs' or this tool will produce an error. * **Phylogenetic trees options** @@ -351,8 +341,30 @@ author = {Eric Wafula}, title = {None}, year = {None}, - eprint = {None}, - url = {None} + url = {https://github.com/dePamphilis/PlantTribes} + } + 10.1093/bioinformatics/btw412 + + @published{Research in Computational Molecular Biology (RECOMB) (pp. 177–191), + author = {Mirarab, S., Nguyen, N., Warnow, T.}, + title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)}, + year = {2014}, + url = {https://github.com/smirarab/pasta} } + + @published{Bioinformatics, + author = {Salvador Capella-Gutierrez, Jose M. Silla-Martinez, Toni Gabaldon}, + title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses}, + year = {2009}, + url = {http://trimal.cgenomics.org} + } + + @published{Bioinformatics, + author = {A. Stamatakis}, + title = {RAxML Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies}, + year = {2014}, + url = {http://sco.h-its.org/exelixis/web/software/raxml/index.html} + } + 10.1371/journal.pone.0009490