view phylogenomics_analysis.xml @ 7:505c38cafb9c draft

Uploaded
author greg
date Thu, 05 Jan 2017 14:26:38 -0500
parents 21298684d201
children 9c43870f6c51
line wrap: on
line source

<tool id="plant_tribes_phylogenomics_analysis" name="PhylogenomicsAnalysis" version="1.0.0">
    <description>pipeline</description>
    <requirements>
        <requirement type="package" version="0.2">plant_tribes_phylogenomics_analysis</requirement>
    </requirements>
    <stdio>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" />
        <!-- In case the return code has not been set properly check stderr too -->
        <regex match="Error:" />
        <regex match="Exception:" />
    </stdio>
    <command>
        <![CDATA[
            #import os
            #set src_dir = $os.path.join('phylogenomicsAnalysis_dir', 'orthogroups_fasta')
            #set dest_dir = $output.extra_files_path
            mkdir -p $dest_dir &&
            PhylogenomicsAnalysis
            --orthogroup_faa "$orthogroup_faa.extra_files_path"
            --scaffold "$scaffold"
            --method $method
            #if str($options_type.options_type_selector) == 'advanced':
                #if str($options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments) == 'yes':
                    #set multiple_sequence_alignments_option_cond = $options_type.multiple_sequence_alignments_cond.multiple_sequence_alignments_option_cond
                    #set multiple_sequence_alignments_option = $multiple_sequence_alignments_option_cond.multiple_sequence_alignments_option
                    #set multiple_codon_alignments_cond = $options_type.multiple_sequence_alignments_cond.multiple_codon_alignments_cond
                    #set multiple_codon_alignments = $multiple_codon_alignments_cond.multiple_codon_alignments
                    #set use_corresponding_coding_sequences_cond = $multiple_codon_alignments_cond.use_corresponding_coding_sequences_cond
                    #set use_corresponding_coding_sequences = $use_corresponding_coding_sequences_cond.use_corresponding_coding_sequences
                    #if str($multiple_sequence_alignments_option) == 'create_alignments':
                        --create_alignments
                    #else if str($multiple_sequence_alignments_option) == 'add_alignments':
                        --add_alignments
                    #else if str($multiple_sequence_alignments_option) == 'pasta_alignments':
                        --pasta_alignments
                        --pasta_iter_limit $multiple_sequence_alignments_option_cond.pasta_iter_limit
                    #end if
                    #if str($multiple_codon_alignments) == 'yes':
                        --codon_alignments
                        #if str($use_corresponding_coding_sequences) == 'yes':
                            --orthogroup_fna
                        #end if
                        --sequence_type $multiple_codon_alignments_cond.sequence_type
                    #end if
                #end if
                #if str($options_type.phylogenetic_trees_cond.phylogenetic_trees) == 'yes':
                    #set tree_inference_cond = $options_type.phylogenetic_trees_cond.tree_inference_cond
                    #set tree_inference_method = $tree_inference_cond.tree_inference
                    --tree_inference $tree_inference_method
                    #if str($tree_inference_method) == 'raxml':
                        --rooting_order "$tree_inference_cond.rooting_order"
                        --bootstrap_replicates $tree_inference_cond.bootstrap_replicates
                    #end if
                    --max_orthogroup_size $options_type.phylogenetic_trees_cond.max_orthogroup_size
                    --min_orthogroup_size $options_type.phylogenetic_trees_cond.min_orthogroup_size
                #end if
                #if str($options_type.msa_quality_control_options_cond.msa_quality_control_options) == 'yes':
                    #set msa_quality_control_options_cond = $options_type.msa_quality_control_options_cond
                    #set remove_gappy_sequences_cond = $msa_quality_control_options_cond.remove_gappy_sequences_cond
                    #if str($remove_gappy_sequences_cond) == 'yes':
                        #set trim_type_cond = $remove_gappy_sequences_cond.trim_type_cond
                        --remove_sequences $remove_gappy_sequences_cond.remove_sequences
                        #if str($trim_type_cond.trim_type) == 'automated_trimming':
                            --automated_trimming
                        else:
                            --gap_trimming $trim_type_cond.gap_trimming
                        #end if
                    #end if
                #end if
            #end if
            --scaffold_dir "${GALAXY_DATA_INDEX_DIR}/plant_tribes/scaffolds"
            --num_threads \${GALAXY_SLOTS:-4}
            && echo "Sequence alignments and corresponding phylogenies: `ls $src_dir | wc -l` files" > $output
            && ls $src_dir >> $output
            && mv $src_dir/* $dest_dir
        ]]>
    </command>
    <inputs>
        <param name="orthogroup_faa" format="txt" type="data" label="Select sequences classified into gene family clusters"/>
        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
            <options from_data_table="plant_tribes_scaffolds" />
            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/>
        </param>
        <param name="method" type="select" label="Protein clustering method">
            <option value="gfam" selected="true">GFam</option>
            <option value="orthofinder">OrthoFinder</option>
            <option value="orthomcl">OrthoMCL</option>
        </param>
        <conditional name="options_type">
            <param name="options_type_selector" type="select" label="Options Configuration">
                <option value="basic" selected="true">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="basic" />
            <when value="advanced">
                <conditional name="multiple_sequence_alignments_cond">
                    <param name="multiple_sequence_alignments" type="select" label="Set multiple sequence alignments options?">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <conditional name="multiple_sequence_alignments_option_cond">
                            <param name="multiple_sequence_alignments_option" label="Select option" type="select" force_select="True">
                                <option value="create_alignments">Create orthogroup protein multiple sequence alignments including scaffold backbone proteins (MAFFT algorithm)</option>
                                <option value="add_alignments">Add unaligned orthogroup proteins to scaffold backbone multiple sequence alignments (MAFFT algorithm)</option>
                                <option value="pasta_alignments">Create orthogroup protein multiple sequence alignments including scaffold backbone proteins (PASTA algorithm)</option>
                            </param>
                            <when value="create_alignments"/>
                            <when value="add_alignments"/>
                            <when value="pasta_alignments">
                                <param name="pasta_iter_limit" type="integer" value="3" min="0" label="Maximum number of iteration that the PASTA algorithm will run"/>
                            </when>
                        </conditional>
                        <conditional name="multiple_codon_alignments_cond">
                            <param name="multiple_codon_alignments" label="Construct orthogroup multiple codon alignments?" type="select">
                                <option value="no" selected="True">No</option>
                                <option value="yes">Yes</option>
                            </param>
                            <when value="yes">
                                <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference - (dna)">
                                    <option value="protein" selected="true">Amino acid based</option>
                                    <option value="dna">Nucleotide based</option>
                                </param>


                                <conditional name="use_corresponding_coding_sequences_cond">
                                    <param name="use_corresponding_coding_sequences" type="select" label="Use corresponding coding sequences if available?" help="Ignored if 'No' was selected for the 'Create corresponding coding sequences?' option in the GeneFamilyClassifier tool">
                                        <option value="no" selected="true">No</option>
                                        <option value="yes">Yes</option>
                                    </param>
                                    <when value="no" />
                                    <when value="yes" />
                                </conditional>


                            </when>
                            <when value="no"/>
                        </conditional>
                    </when>
                </conditional>
                <conditional name="phylogenetic_trees_cond">
                    <param name="phylogenetic_trees" type="select" label="Set phylogenetic trees options?">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <conditional name="tree_inference_cond">
                            <param name="tree_inference" type="select" label="Phylogenetic trees inference method">
                                <option value="raxml" selected="true">RAxML</option>
                                <option value="fasttree">FastTree</option>
                            </param>
                            <when value="raxml">
                                <param name="rooting_order" format="txt" type="data" label="Select file containing strings matching sequence identifiers of species for determining the most basal taxa in the orthogroups for rooting trees"/>
                                <param name="bootstrap_replicates" type="integer" value="100" min="0" label="Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree"/>
                            </when>
                            <when value="fasttree"/>
                        </conditional>
                        <param name="max_orthogroup_size" type="integer" value="100" min="0" label="Maximum number of sequences in orthogroup alignments"/>
                        <param name="min_orthogroup_size" type="integer" value="4" min="0" label="Minimum number of sequences in orthogroup alignments"/>
                    </when>
                </conditional>
                <conditional name="msa_quality_control_options_cond">
                    <param name="msa_quality_control_options" type="select" label="Set MSA quality control options?">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <conditional name="remove_gappy_sequences_cond">
                            <param name="remove_gappy_sequences" type="select" label="Remove gappy sequences in alignments?">
                                <option value="no" selected="true">No</option>
                                <option value="yes">Yes</option>
                            </param>
                            <when value="no"/>
                            <when value="yes">
                                <param name="remove_sequences" type="float" value="0.5" min="0" max="1" label="Remove sequences with gaps of" help="0.5 removes sequences with 50% gaps"/>
                                <conditional name="trim_type_cond">
                                    <param name="trim_type" type="select" label="Select process used for gap trimming">
                                        <option value="automated_trimming" selected="true">Trim alignments using trimAl's ML heuristic trimming approach</option>
                                        <option value="gap_trimming">Nucleotide based</option>
                                    </param>
                                    <when value="automated_trimming"/>
                                    <when value="gap_trimming">
                                        <param name="gap_trimming" type="float" value="0.1" min="0" max="1.0" label="Remove sites in alignments with gaps of" help="0.1 removes sites with 90% gaps"/>
                                    </when>
                                </conditional>
                            </when>
                        </conditional>
                    </when>
                </conditional>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output" format="txt" label="Sequence alignments and corresponding phylogenies on ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <param name="input" value="" ftype="fasta" />
            <param name="prediction_method" value="transdecoder"/>
            <param name="target_gene_family_assembly" value="no"/>
            <param name="strand_specific" value="yes"/>
            <param name="dereplicate" value="yes"/>
            <param name="min_length" value="200"/>
            <output_collection name="orthos" type="list">
            </output_collection>
        </test>
    </tests>
    <help>
This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic
analyses, comparing and analyzing the sequences of single genes, or a small number of genes, as well as many other types of data.

-----

**Options**

 * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
 * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL.
 * **Protein classification method** - One of blastp, HMMScan or both.
 * **SuperOrthogroups MCL clustering** - blastp e-value matrix between all pairs of orthogroups.
 * **Minumum single copy taxa required in orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only.
 * **Minumum taxa required in single copy orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only.
 * **Corresponding coding sequences (CDS) fasta file** - Used only when selecting "Create orthogroup fasta files?".
 
    </help>
    <citations>
            <citation type="bibtex">
                @unpublished{None,
                author = {None},
                title = {None},
                year = {None},
                eprint = {None},
                url = {None}
            }</citation>
    </citations>
</tool>