diff transcriptsToOrfs.xml @ 2:5eb99d21ef0d

Add trinityrnaseq_norm and transcriptsToOrfs tools
author Jim Johnson <jj@umn.edu>
date Thu, 05 Sep 2013 08:08:21 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/transcriptsToOrfs.xml	Thu Sep 05 08:08:21 2013 -0500
@@ -0,0 +1,117 @@
+<tool id="transcriptsToOrfs" name="transcriptsToOrfs" version="0.0.2">
+    <description>Trinity Transcripts to Candidate Peptides</description>
+    <requirements>
+        <requirement type="package" version="2013_08_14">trinityrnaseq</requirement>
+        <requirement type="package" version="3.0">hmmer</requirement>
+    </requirements>
+    <command>
+        \$TRINITY_HOME/trinity-plugins/transdecoder/transcripts_to_best_scoring_ORFs.pl
+        -t $transcripts 
+        #if $min_prot_length:
+            -m $min_prot_length 
+        #end if 
+        #if $retain_long_orfs:
+            --retain_long_orfs $retain_long_orfs 
+        #end if 
+        #if $training_count:
+            -T $training_count 
+        #end if 
+        #if str($strand_specificity) == 'SS':
+            -S
+        #end if
+        #if $genetic_code.__str__ != '':
+            -G $genetic_code
+        #end if
+        #if $search.use_pfam == 'yes':
+          --search_pfam "${ filter( lambda x: str( x[0] ) == str( $search.pfam_db ), $__app__.tool_data_tables[ 'pfam_databases' ].get_fields() )[0][-1] }"
+          --CPU $search.CPU
+        #end if
+    </command>
+    <inputs>
+        <param format="fasta" name="transcripts" type="data" label="Transcripts sequences in fastA format" help="" />
+        <param name="min_prot_length"  type="integer" value="" optional="true" label="Minimum peptide length (in amino acids)" help="default: 100">
+            <validator type="in_range" message="Minimum peptide length should be at least 50" min="50" />
+        </param>
+        <param name="retain_long_orfs"  type="integer" value="" optional="true" label="Retain all ORFs found that are of minimum length in nucleotides" help="default: 900" >
+            <validator type="in_range" message="ORF length should be at least 50" min="50" />
+        </param>
+        <param name="training_count"  type="integer" value="" optional="true" label="Number of top longest ORFs to train Markov Model (hexamer stats)" help="default: 500" >
+            <validator type="in_range" message="ORF count should be at least 50" min="50" />
+        </param>
+        <param name="strand_specificity" type="select" label="Strand specificity type">
+            <option value="DS">NOT strand specific, examine both strands</option>
+            <option value="SS">Strand specific, examine only top strand</option>
+        </param>
+        <param name="genetic_code" type="select" label="Genetic Code">
+            <option value="">use default(universal)</option>
+            <option value="universal">universal</option>
+            <option value="Euplotes">Euplotes</option>
+            <option value="Tetrahymena">Tetrahymena</option>
+            <option value="Candida">Candida</option>
+            <option value="Acetabularia">Acetabularia</option>
+        </param>
+        <conditional name="search">
+            <param name="use_pfam" type="select" label="Search PFAM database">
+                <option value="no">NO</option>
+                <option value="yes">YES</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="pfam_db" type="select" label="Pfam database">
+                    <options from_data_table="pfam_databases" />
+                </param>
+                <param name="CPU" type="integer" value="2" min="1" label="CPU" help="Number of CPUs to use by hmmscan" />
+            </when>
+        </conditional>
+    </inputs>
+    <stdio>
+        <exit_code range="1:"  level="fatal" description="Failed" />
+        <regex match="Error" 
+               source="stderr" 
+               level="fatal" 
+               description="Failed" />
+    </stdio>
+    <outputs>
+        <data format="txt" name="trinity_pep_pfam" label="${tool.name} on ${on_string}: Pfam matches to Candidate Peptide Sequences" from_work_dir="longest_orfs.pep.pfam.dat">
+          <filter>search['use_pfam'] == 'yes'</filter>
+        </data>
+        <data format="gff3" name="trinity_pep_gff3" label="${tool.name} on ${on_string} Candidate Peptide Features" from_work_dir="best_candidates.eclipsed_orfs_removed.gff3" />
+        <data format="bed" name="trinity_pep_bed" label="${tool.name} on ${on_string} Candidate Peptide Coordinates" from_work_dir="best_candidates.eclipsed_orfs_removed.bed" />
+        <data format="fasta" name="trinity_pep_cds" label="${tool.name} on ${on_string}: Candidate Peptide CDS Sequences" from_work_dir="best_candidates.eclipsed_orfs_removed.cds"/>
+        <data format="fasta" name="trinity_pep_seqs" label="${tool.name} on ${on_string}: Candidate Peptide Sequences" from_work_dir="best_candidates.eclipsed_orfs_removed.pep"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="transcripts" ftype="fasta" value="TrinitySingle.fasta"/>
+            <param name="min_prot_length" value="100"/>
+            <param name="use_pfam" value="no"/>
+            <output name="trinity_pep_seqs">
+                <assert_contents>
+                    <has_text text="WAAKAWLITARSLYPADF" />
+                </assert_contents>
+            </output>
+            <output name="trinity_pep_cds">
+                <assert_contents>
+                    <has_text text="TGGGCAGCCAAGGCATGGCTGATCACGGCCCGCA" />
+                </assert_contents>
+            </output>
+            <output name="trinity_pep_bed">
+                <assert_contents>
+                    <has_text text="comp10_c0_seq1" />
+                </assert_contents>
+            </output>
+            <output name="trinity_pep_gff3">
+                <assert_contents>
+                    <has_text text="comp10_c0_seq1" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+        ** transcriptsToOrfs ** 
+        Trinity_ is a de novo transcript assembler that uses RNA-seq data as input. 
+        This tool searches for open reading frames in the assembled transcripts.
+        
+        .. _Trinity: http://trinityrnaseq.sourceforge.net
+    </help>
+</tool>