# HG changeset patch # User greg # Date 1486735532 18000 # Node ID 32c3bf33cad7c6602adba6aa9b9d5e01adef845a # Parent ba0315c7b28b9294fc52ad27ef8cbed881addbd3 Uploaded diff -r ba0315c7b28b -r 32c3bf33cad7 phylogenomics_analysis.xml --- a/phylogenomics_analysis.xml Fri Feb 03 14:55:26 2017 -0500 +++ b/phylogenomics_analysis.xml Fri Feb 10 09:05:32 2017 -0500 @@ -1,7 +1,8 @@ and inferred maximum likelihood phylogenies - plant_tribes_phylogenomics_analysis + plant_tribes_phylogenomics_analysis + @@ -25,12 +26,12 @@ #set src_fasta_dir = $os.path.join($pipeline_output_dir, 'orthogroups_fasta') #set src_tree_dir = $os.path.join($pipeline_output_dir, 'orthogroups_tree') - #if str($input_format) == 'ptcgf': - #set dest_fasta_dir = $output_fasta_ptcgf.extra_files_path + #if str($input_format) == 'ptortho': + #set dest_fasta_dir = $output_fasta_ptortho.extra_files_path #set multiple_codon_alignments = 'no' #set use_corresponding_coding_sequences = 'no' #else: - #set dest_fasta_dir = $output_fasta_ptcgfcs.extra_files_path + #set dest_fasta_dir = $output_fasta_ptorthocs.extra_files_path #set multiple_codon_alignments_cond = $input_format_cond.multiple_codon_alignments_cond #set multiple_codon_alignments = $multiple_codon_alignments_cond.multiple_codon_alignments #if str($multiple_codon_alignments) == 'yes': @@ -68,10 +69,10 @@ PhylogenomicsAnalysis --num_threads \${GALAXY_SLOTS:-4} - #if str($input_format) == 'ptcgf': - --orthogroup_faa '$input_format_cond.input_ptcgf.extra_files_path' + #if str($input_format) == 'ptortho': + --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path' #else: - --orthogroup_faa '$input_format_cond.input_ptcgfcs.extra_files_path' + --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path' #end if --scaffold_dir '$scaffold_dir' --scaffold '$scaffold_selection' @@ -121,25 +122,25 @@ #end if #end if - #if str($input_format) == 'ptcgf': - && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgf - && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgf + && echo "Sequence alignments and corresponding phylogenies" + #if str($input_format) == 'ptortho': + && echo -e "#Size\tName" > $output_fasta_ptortho + && ls -l $src_fasta_dir | grep f | awk -F ' ' '{print $5"\t"$9}' >> $output_fasta_ptortho #else: - && echo "Sequence alignments and corresponding phylogenies: `ls $src_fasta_dir | grep f | wc -l` files" > $output_fasta_ptcgfcs - && ls -al $src_fasta_dir | grep f >> $output_fasta_ptcgfcs + && echo -e "#Size\tName" > $output_fasta_ptorthocs + && ls -l $src_fasta_dir | grep f | awk -F ' ' '{print $5"\t"$9}' >> $output_fasta_ptorthocs #end if - && mv $src_fasta_dir/* $dest_fasta_dir || true #if $generate_output_aln: - && echo "Multiple sequence alignments using method '$multiple_sequence_alignments_option' (note that some files may be empty): `ls $src_aln_dir | grep f | wc -l` files" > $output_aln - && ls -al $src_aln_dir | grep f >> $output_aln + && echo -e "#Size\tName" > $output_aln + && ls -l $src_aln_dir | grep f | awk -F ' ' '{print $5"\t"$9}' >> $output_aln && mv $src_aln_dir/* $dest_aln_dir || true #end if #if $generate_output_tree: - && echo "Phylogenetic trees (note that some files may be empty): `ls $src_tree_dir | grep f | wc -l` files" > $output_tree - && ls -al $src_tree_dir | grep f >> $output_tree + && echo -e "#Size\tName" > $output_tree + && ls -l $src_tree_dir | grep f | awk -F ' ' '{print $5"\t"$9}' >> $output_tree && mv $src_tree_dir/* $dest_tree_dir || true #end if ]]> @@ -147,22 +148,22 @@ - - + + - - + + - - + + - + - + @@ -180,7 +181,7 @@ - + @@ -202,15 +203,16 @@ - + - - + + - + @@ -233,17 +235,17 @@ - + - + - + - + - - + + @@ -258,17 +260,17 @@ - + - + - + - + - + @@ -279,13 +281,13 @@ - - input_format_cond['input_format'] == 'ptcgf' + + input_format_cond['input_format'] == 'ptortho' - - input_format_cond['input_format'] == 'ptcgfcs' + + input_format_cond['input_format'] == 'ptorthocs' - + options_type['options_type_selector'] == 'advanced' and options_type['multiple_sequence_alignments_cond']['multiple_sequence_alignments'] == 'yes' @@ -297,75 +299,82 @@ -This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of -complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic -analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produced by the -**Classify gene sequences into precomputed orthologous gene family clusters** tool. + This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of + complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic + analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produced by the + **Classify gene sequences into precomputed orthologous gene family clusters** tool. ------ + ----- -**Options** + **Options** -* **Required options** + * **Required options** - - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences. - - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. - - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters). - -* **Multiple sequence alignments options** + - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences. + - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. + - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters). + * **Multiple sequence alignments options** - - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments. - - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptcgfcs'. - - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments. - - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference. - - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptcgfcs' or this tool will produce an error. + - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments. + - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs'. + - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments. + - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference. + - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs' or this tool will produce an error. -* **Phylogenetic trees options** + * **Phylogenetic trees options** - - **Phylogenetic trees inference method** - Phylogenetic trees inference method. - - **Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup. - - **History item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees** - History item containing a set of string fragments matching sequences identifiers of species in the classification (including scaffold taxa) to be used for determining the most basal taxa in the orthogroups for rooting trees. The set of string fragments must be listed in decreasing order from older to younger lineages. - - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree. - - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments. - - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments. + - **Phylogenetic trees inference method** - Phylogenetic trees inference method. + - **Select history item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees?** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup. + - **History item containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees** - History item containing a set of string fragments matching sequences identifiers of species in the classification (including scaffold taxa) to be used for determining the most basal taxa in the orthogroups for rooting trees. The set of string fragments must be listed in decreasing order from older to younger lineages. + - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree. + - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments. + - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments. -* **MSA quality control options** + * **MSA quality control options** - - **Remove sequences with gaps of** - Removes gappy sequences in alignments (i.e., 0.5 removes sequences with 50% gaps). - - **Select process used for gap trimming** - Either nucleotide based trimming or alignments are trimed using using trimAl's ML heuristic trimming approach. - - **Remove sites in alignments with gaps of** - If the process used for gap trimming is nucleotide based, this is the gap value used when removing gappy sites in alignments (i.e., 0.1 removes sites with 90% gaps). + - **Remove sequences with gaps of** - Removes gappy sequences in alignments (i.e., 0.5 removes sequences with 50% gaps). + - **Select process used for gap trimming** - Either nucleotide based trimming or alignments are trimed using using trimAl's ML heuristic trimming approach. + - **Remove sites in alignments with gaps of** - If the process used for gap trimming is nucleotide based, this is the gap value used when removing gappy sites in alignments (i.e., 0.1 removes sites with 90% gaps). - - @unpublished{None, - author = {Eric Wafula}, - title = {None}, - year = {None}, - url = {https://github.com/dePamphilis/PlantTribes} - } - 10.1093/bioinformatics/btw412 - - @published{Research in Computational Molecular Biology (RECOMB) (pp. 177–191), - author = {Mirarab, S., Nguyen, N., Warnow, T.}, - title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)}, - year = {2014}, - url = {https://github.com/smirarab/pasta} - } - - @published{Bioinformatics, - author = {Salvador Capella-Gutierrez, Jose M. Silla-Martinez, Toni Gabaldon}, - title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses}, - year = {2009}, - url = {http://trimal.cgenomics.org} - } - - @published{Bioinformatics, - author = {A. Stamatakis}, - title = {RAxML Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies}, - year = {2014}, - url = {http://sco.h-its.org/exelixis/web/software/raxml/index.html} - } - 10.1371/journal.pone.0009490 + + @unpublished{None, + author = {Eric Wafula}, + title = {None}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes} + } + + 10.1093/bioinformatics/btw412 + + @published{Research in Computational Molecular Biology (RECOMB) (pp. 177–191), + author = {Mirarab, S., Nguyen, N., Warnow, T.}, + title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan + (Ed.)}, + year = {2014}, + url = {https://github.com/smirarab/pasta} + } + + + @published{Bioinformatics, + author = {Salvador Capella-Gutierrez, Jose M. Silla-Martinez, Toni + Gabaldon}, + title = {trimAl: a tool for automated alignment trimming in large-scale + phylogenetic analyses}, + year = {2009}, + url = {http://trimal.cgenomics.org} + } + + + @published{Bioinformatics, + author = {A. Stamatakis}, + title = {RAxML Version 8: A tool for Phylogenetic Analysis and + Post-Analysis of Large Phylogenies}, + year = {2014}, + url = {http://sco.h-its.org/exelixis/web/software/raxml/index.html} + } + + 10.1371/journal.pone.0009490