view assembly_post_processor.xml @ 29:7af9033b21a2 draft

Uploaded
author greg
date Fri, 09 Dec 2016 08:28:49 -0500
parents
children
line wrap: on
line source

<tool id="plant_tribes_assembly_post_processor" name="PlantTribes AssemblyPostProcessor" version="1.0.0">
    <description>pipeline</description>
    <requirements>
        <requirement type="package" version="0.2">plant_tribes_assembly_post_processor</requirement>
    </requirements>
    <stdio>
        <!-- Anything other than zero is an error -->
        <exit_code range="1:" />
        <!-- In case the return code has not been set properly check stderr too -->
        <regex match="Error:" />
        <regex match="Exception:" />
    </stdio>
    <command>
        <![CDATA[
            AssemblyPostProcesser
            --transcripts "$input"
            --prediction_method $prediction_method_cond.prediction_method
            #if str($prediction_method_cond.prediction_method) == 'estscan':
                --score_matrices "$score_matrices"
            #end if
            #if str($options_type.options_type_selector) == 'advanced':
                #if str($options_type.target_gene_family_assembly_cond.target_gene_family_assembly) == 'yes':
                    --gene_family_search "$options_type.target_gene_family_assembly_cond.orthogroups"
                    --scaffold "$options_type.target_gene_family_assembly_cond.scaffold"
                    --method "$options_type.target_gene_family_assembly_cond.method"
                #end if
                --gap_trimming $options_type.gap_trimming
                #if str($options_type.strand_specific) == 'yes':
                    --strand_specific
                #end if
                #if str($options_type.dereplicate) == 'yes':
                    --dereplicate
                #end if
                --min_length $options_type.min_length
            #end if
            --num_threads \${GALAXY_SLOTS:-4}
        ]]>
    </command>
    <inputs>
        <param name="input" format="fasta" type="data" label="De novo transcriptome assembly fasta file"/>
        <conditional name="prediction_method_cond">
            <param name="prediction_method" type="select" label="Prediction method for coding regions">
                <option value="transdecoder" selected="true">TransDecoder</option>
                <option value="estscan">ESTScan</option>
            </param>
            <when value="transdecoder" />
            <when value="estscan">
                <param name="score_matrices" format="smat" type="data" label="Scores matrices"/>
            </when>
        </conditional>
        <conditional name="options_type">
            <param name="options_type_selector" type="select" label="Options Configuration">
                <option value="basic" selected="true">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="basic" />
            <when value="advanced">
                <conditional name="target_gene_family_assembly_cond">
                    <param name="target_gene_family_assembly" type="select" label="Target gene family assembly?">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                    </param>
                    <when value="no" />
                    <when value="yes">
                        <param name="orthogroups" format="tabular" type="data" label="List of orthogroup identifiers for target gene families to assemble"/>
                        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
                            <options from_data_table="plant_tribes_scaffolds" />
                            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/>
                        </param>
                        <param name="method" type="select" label="Protein clustering method">
                            <option value="gfam" selected="true">GFam</option>
                            <option value="orthofinder">OrthoFinder</option>
                            <option value="orthomcl">OrthoMCL</option>
                        </param>
                    </when>
                </conditional>
                <param name="gap_trimming" type="float" value="0.1" min="0" max="1.0" label="Remove gappy sites in alignments" help="0.1 removes sites with 90% gaps"/>
                <param name="strand_specific" type="select" label="De novo transcriptome assembly was performed with strand-specific library?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">Yes</option>
                </param>
                <param name="dereplicate" type="select" label="Remove repeated sequences in predicted coding regions?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">Yes</option>
                </param>
                <param name="min_length" type="integer" value="200" label="Minimum sequence length of predicted coding regions"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <collection name="transcripts" type="list">
            <discover_datasets pattern="__name__" directory="assemblyPostProcessing_dir" visible="false" ext="fasta" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="input" value="assembly.fasta" ftype="fasta" />
            <param name="prediction_method" value="transdecoder" />
            <param name="options_type_selector" value="advanced" />
            <param name="strand_specific" value="yes" />
            <param name="dereplicate" value="yes" />
            <output_collection name="transcripts" type="list">
                <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" />
                <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" />
                <element name="transcripts.cleaned.nr.cds" file="transcripts.cleaned.nr.cds" ftype="fasta" />
                <element name="transcripts.cleaned.nr.pep" file="transcripts.cleaned.nr.pep" ftype="fasta" />
                <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep" ftype="fasta" />
                <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" />
            </output_collection>
        </test>
    </tests>
    <help>
This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It post-processes de novo assembly
transcripts into putative coding sequences and their corresponding amino acid translations.

-----

**Options**

 * **Prediction method for coding regions** - The prediction method for coding regions; one of ESTScan or TransDecoder.
 * **Scores matrices** - Scores matrices that reflect the codons preferences in the studied organisms.
 * **Target gene family assembly?** - Select yes to target gene families to assemble.
 * **List of orthogroup identifiers for target gene families to assemble** - History item with a list of orthogroup identifiers for target gene families to assemble.  The next version of this tool will use a dynamic select list for this feature.
 * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
 * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL.
 * **Remove gappy sites in alignments** - Removes gappy sites in alignments (i.e. 0.1 removes sites with 90% gaps): [0.0 to 1.0]
 * **De novo transcriptome assembly was performed with strand-specific library?** - Select yes if de novo transcriptome assembly was performed with strand-specific library.
 * **Remove repeated sequences in predicted coding regions?** - Select yes to remove repeated sequences in predicted coding regions.
 * **Minimum sequence length of predicted coding regions** - Set the minimum sequence length of predicted coding regions.
 
    </help>
    <citations>
            <citation type="bibtex">
                @unpublished{None,
                author = {None},
                title = {None},
                year = {None},
                eprint = {None},
                url = {None}
            }</citation>
    </citations>
</tool>