view assembly_post_processor.xml @ 12:622603f41432 draft

Uploaded
author greg
date Tue, 31 Jan 2017 13:46:34 -0500
parents ef522c398ad4
children 521d09ac701a
line wrap: on
line source

<tool id="plant_tribes_assembly_post_processor" name="Postprocess de novo assembly transcripts" version="0.3">
    <description>into putative coding sequences</description>
    <requirements>
        <requirement type="package" version="0.3">plant_tribes_assembly_post_processor</requirement>
    </requirements>
    <stdio>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" />
        <exit_code range=":-1" />
        <!-- In case the return code has not been set propery check stderr too -->
        <regex match="Error:" />
        <regex match="Exception:" />
    </stdio>
    <command>
        <![CDATA[
            #import os
            AssemblyPostProcesser
            --transcripts '$input'
            --prediction_method $prediction_method_cond.prediction_method
            #if str($prediction_method_cond.prediction_method) == 'estscan':
                --score_matrices '$score_matrices'
            #end if
            #if str($options_type.options_type_selector) == 'advanced':
                #if str($options_type.target_gene_family_assembly_cond.target_gene_family_assembly) == 'yes':
                    #set scaffold_path = $options_type.target_gene_family_assembly_cond.scaffold['path']
                    #set scaffold_dir = $os.path.split($scaffold_path)[0]
                    #set scaffold = $os.path.split($scaffold_path)[1]
                    --gene_family_search '$options_type.target_gene_family_assembly_cond.orthogroups'
                    --scaffold_dir $scaffold_dir
                    --scaffold $scaffold
                    --method '$options_type.target_gene_family_assembly_cond.method'
                #end if
                --gap_trimming $options_type.gap_trimming
                #if str($options_type.strand_specific) == 'yes':
                    --strand_specific
                #end if
                #if str($options_type.dereplicate) == 'yes':
                    --dereplicate
                #end if
                --min_length $options_type.min_length
            #end if
            --num_threads \${GALAXY_SLOTS:-4}
        ]]>
    </command>
    <inputs>
        <param name="input" format="fasta" type="data" label="De novo transcriptome assembly fasta file"/>
        <conditional name="prediction_method_cond">
            <param name="prediction_method" type="select" label="Prediction method for coding regions">
                <option value="transdecoder" selected="true">TransDecoder</option>
                <option value="estscan">ESTScan</option>
            </param>
            <when value="transdecoder" />
            <when value="estscan">
                <param name="score_matrices" format="smat" type="data" label="Scores matrices"/>
            </when>
        </conditional>
        <conditional name="options_type">
            <param name="options_type_selector" type="select" label="Options Configuration">
                <option value="basic" selected="true">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="basic" />
            <when value="advanced">
                <conditional name="target_gene_family_assembly_cond">
                    <param name="target_gene_family_assembly" type="select" label="Target gene family assembly?">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <param name="orthogroups" format="tabular" type="data" label="List of orthogroup identifiers for target gene families to assemble"/>
                        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
                            <options from_data_table="plant_tribes_scaffolds" />
                            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
                        </param>
                        <param name="method" type="select" label="Protein clustering method">
                            <option value="gfam" selected="true">GFam</option>
                            <option value="orthofinder">OrthoFinder</option>
                            <option value="orthomcl">OrthoMCL</option>
                        </param>
                    </when>
                </conditional>
                <param name="gap_trimming" type="float" value="0.1" min="0" max="1.0" label="Remove sites in alignments with gaps of" help="0.1 removes sites with 90% gaps"/>
                <param name="strand_specific" type="select" label="De novo transcriptome assembly was performed with strand-specific library?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">Yes</option>
                </param>
                <param name="dereplicate" type="select" label="Remove repeated sequences in predicted coding regions?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">Yes</option>
                </param>
                <param name="min_length" type="integer" value="200" label="Minimum sequence length of predicted coding regions"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <collection name="transcripts" type="list">
            <discover_datasets pattern="__name__" directory="assemblyPostProcessing_dir" visible="true" ext="fasta" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="input" value="assembly.fasta" ftype="fasta" />
            <param name="prediction_method" value="transdecoder" />
            <output_collection name="transcripts" type="list">
                <element name="transcripts.cds" file="transcripts.cds" ftype="fasta"/>
                <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta"/>
                <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep1" ftype="fasta"/>
                <element name="transcripts.pep" file="transcripts.pep" ftype="fasta"/>
            </output_collection>
        </test>
        <test>
            <param name="input" value="assembly.fasta" ftype="fasta" />
            <param name="prediction_method" value="transdecoder" />
            <param name="options_type_selector" value="advanced" />
            <param name="gap_trimming" value="0.1" />
            <param name="dereplicate" value="yes" />
            <param name="min_length" value="200" />
            <output_collection name="transcripts" type="list">
                <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" compare="contains"/>
                <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" compare="contains"/>
                <element name="transcripts.cleaned.nr.cds" file="transcripts.cleaned.nr.cds" ftype="fasta" compare="contains"/>
                <element name="transcripts.cleaned.nr.pep" file="transcripts.cleaned.nr.pep" ftype="fasta" compare="contains"/>
                <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep2" ftype="fasta" compare="contains"/>
                <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" compare="contains"/>
            </output_collection>
        </test>
    </tests>
    <help>
This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It postprocesses de novo assembly
transcripts into putative coding sequences and their corresponding amino acid translations, locally assembling targeted gene families.

-----

**Options**

 * **Prediction method for coding regions** - The prediction method for coding regions; one of ESTScan or TransDecoder.
 * **Scores matrices** - Scores matrices that reflect the codons preferences in the studied organisms.
 * **Target gene family assembly?** - Select 'Yes' to target gene families to assemble.
 * **List of orthogroup identifiers for target gene families to assemble** - History item with a list of orthogroup identifiers for target gene families to assemble.
 * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
 * **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters).
 * **Remove sites in alignments with gaps of** - Removes gappy sites in alignments (i.e. 0.1 removes sites with 90% gaps): [0.0 to 1.0]
 * **De novo transcriptome assembly was performed with strand-specific library?** - Select 'Yes' if de novo transcriptome assembly was performed with strand-specific library.
 * **Remove repeated sequences in predicted coding regions?** - Select yes to remove repeated sequences in predicted coding regions.
 * **Minimum sequence length of predicted coding regions** - Set the minimum sequence length of predicted coding regions.
 
    </help>
    <citations>
            <citation type="bibtex">
                @unpublished{None,
                author = {Eric Wafula},
                title = {None},
                year = {None},
                url = {https://github.com/dePamphilis/PlantTribes}
            }</citation>
            <citation type="bibtex">
                @published{Proc Int Conf Intell Syst Mol Biol,
                author = {Iseli C, Jongeneel CV, Bucher P.},
                title = {ESTScan: a program for detecting, evaluating, and reconstructing potential coding regions in EST sequences.},
                year = {1999},
                url = {http://estscan.sourceforge.net/}
            }</citation>
            <citation type="doi">10.1038/nprot.2013.084</citation>
            <citation type="doi">10.1109/tcbb.2013.68</citation>
    </citations>
</tool>