Mercurial > repos > greg > plant_tribes_assembly_post_processor
changeset 29:7af9033b21a2 draft
Uploaded
author | greg |
---|---|
date | Fri, 09 Dec 2016 08:28:49 -0500 |
parents | 6ef336f7eb86 |
children | 6dd02ba54da1 |
files | assembly_post_processor.xml |
diffstat | 1 files changed, 143 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assembly_post_processor.xml Fri Dec 09 08:28:49 2016 -0500 @@ -0,0 +1,143 @@ +<tool id="plant_tribes_assembly_post_processor" name="PlantTribes AssemblyPostProcessor" version="1.0.0"> + <description>pipeline</description> + <requirements> + <requirement type="package" version="0.2">plant_tribes_assembly_post_processor</requirement> + </requirements> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <!-- In case the return code has not been set properly check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + <command> + <![CDATA[ + AssemblyPostProcesser + --transcripts "$input" + --prediction_method $prediction_method_cond.prediction_method + #if str($prediction_method_cond.prediction_method) == 'estscan': + --score_matrices "$score_matrices" + #end if + #if str($options_type.options_type_selector) == 'advanced': + #if str($options_type.target_gene_family_assembly_cond.target_gene_family_assembly) == 'yes': + --gene_family_search "$options_type.target_gene_family_assembly_cond.orthogroups" + --scaffold "$options_type.target_gene_family_assembly_cond.scaffold" + --method "$options_type.target_gene_family_assembly_cond.method" + #end if + --gap_trimming $options_type.gap_trimming + #if str($options_type.strand_specific) == 'yes': + --strand_specific + #end if + #if str($options_type.dereplicate) == 'yes': + --dereplicate + #end if + --min_length $options_type.min_length + #end if + --num_threads \${GALAXY_SLOTS:-4} + ]]> + </command> + <inputs> + <param name="input" format="fasta" type="data" label="De novo transcriptome assembly fasta file"/> + <conditional name="prediction_method_cond"> + <param name="prediction_method" type="select" label="Prediction method for coding regions"> + <option value="transdecoder" selected="true">TransDecoder</option> + <option value="estscan">ESTScan</option> + </param> + <when value="transdecoder" /> + <when value="estscan"> + <param name="score_matrices" format="smat" type="data" label="Scores matrices"/> + </when> + </conditional> + <conditional name="options_type"> + <param name="options_type_selector" type="select" label="Options Configuration"> + <option value="basic" selected="true">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="basic" /> + <when value="advanced"> + <conditional name="target_gene_family_assembly_cond"> + <param name="target_gene_family_assembly" type="select" label="Target gene family assembly?"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="orthogroups" format="tabular" type="data" label="List of orthogroup identifiers for target gene families to assemble"/> + <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold"> + <options from_data_table="plant_tribes_scaffolds" /> + <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool to install and populate the PlantTribes scaffolds data table."/> + </param> + <param name="method" type="select" label="Protein clustering method"> + <option value="gfam" selected="true">GFam</option> + <option value="orthofinder">OrthoFinder</option> + <option value="orthomcl">OrthoMCL</option> + </param> + </when> + </conditional> + <param name="gap_trimming" type="float" value="0.1" min="0" max="1.0" label="Remove gappy sites in alignments" help="0.1 removes sites with 90% gaps"/> + <param name="strand_specific" type="select" label="De novo transcriptome assembly was performed with strand-specific library?"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <param name="dereplicate" type="select" label="Remove repeated sequences in predicted coding regions?"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <param name="min_length" type="integer" value="200" label="Minimum sequence length of predicted coding regions"/> + </when> + </conditional> + </inputs> + <outputs> + <collection name="transcripts" type="list"> + <discover_datasets pattern="__name__" directory="assemblyPostProcessing_dir" visible="false" ext="fasta" /> + </collection> + </outputs> + <tests> + <test> + <param name="input" value="assembly.fasta" ftype="fasta" /> + <param name="prediction_method" value="transdecoder" /> + <param name="options_type_selector" value="advanced" /> + <param name="strand_specific" value="yes" /> + <param name="dereplicate" value="yes" /> + <output_collection name="transcripts" type="list"> + <element name="transcripts.cds" file="transcripts.cds" ftype="fasta" /> + <element name="transcripts.cleaned.cds" file="transcripts.cleaned.cds" ftype="fasta" /> + <element name="transcripts.cleaned.nr.cds" file="transcripts.cleaned.nr.cds" ftype="fasta" /> + <element name="transcripts.cleaned.nr.pep" file="transcripts.cleaned.nr.pep" ftype="fasta" /> + <element name="transcripts.cleaned.pep" file="transcripts.cleaned.pep" ftype="fasta" /> + <element name="transcripts.pep" file="transcripts.pep" ftype="fasta" /> + </output_collection> + </test> + </tests> + <help> +This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of +complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It post-processes de novo assembly +transcripts into putative coding sequences and their corresponding amino acid translations. + +----- + +**Options** + + * **Prediction method for coding regions** - The prediction method for coding regions; one of ESTScan or TransDecoder. + * **Scores matrices** - Scores matrices that reflect the codons preferences in the studied organisms. + * **Target gene family assembly?** - Select yes to target gene families to assemble. + * **List of orthogroup identifiers for target gene families to assemble** - History item with a list of orthogroup identifiers for target gene families to assemble. The next version of this tool will use a dynamic select list for this feature. + * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data. + * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL. + * **Remove gappy sites in alignments** - Removes gappy sites in alignments (i.e. 0.1 removes sites with 90% gaps): [0.0 to 1.0] + * **De novo transcriptome assembly was performed with strand-specific library?** - Select yes if de novo transcriptome assembly was performed with strand-specific library. + * **Remove repeated sequences in predicted coding regions?** - Select yes to remove repeated sequences in predicted coding regions. + * **Minimum sequence length of predicted coding regions** - Set the minimum sequence length of predicted coding regions. + + </help> + <citations> + <citation type="bibtex"> + @unpublished{None, + author = {None}, + title = {None}, + year = {None}, + eprint = {None}, + url = {None} + }</citation> + </citations> +</tool>