Mercurial > repos > jjohnson > trinityrnaseq
diff transcriptsToOrfs.xml @ 2:5eb99d21ef0d
Add trinityrnaseq_norm and transcriptsToOrfs tools
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 05 Sep 2013 08:08:21 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/transcriptsToOrfs.xml Thu Sep 05 08:08:21 2013 -0500 @@ -0,0 +1,117 @@ +<tool id="transcriptsToOrfs" name="transcriptsToOrfs" version="0.0.2"> + <description>Trinity Transcripts to Candidate Peptides</description> + <requirements> + <requirement type="package" version="2013_08_14">trinityrnaseq</requirement> + <requirement type="package" version="3.0">hmmer</requirement> + </requirements> + <command> + \$TRINITY_HOME/trinity-plugins/transdecoder/transcripts_to_best_scoring_ORFs.pl + -t $transcripts + #if $min_prot_length: + -m $min_prot_length + #end if + #if $retain_long_orfs: + --retain_long_orfs $retain_long_orfs + #end if + #if $training_count: + -T $training_count + #end if + #if str($strand_specificity) == 'SS': + -S + #end if + #if $genetic_code.__str__ != '': + -G $genetic_code + #end if + #if $search.use_pfam == 'yes': + --search_pfam "${ filter( lambda x: str( x[0] ) == str( $search.pfam_db ), $__app__.tool_data_tables[ 'pfam_databases' ].get_fields() )[0][-1] }" + --CPU $search.CPU + #end if + </command> + <inputs> + <param format="fasta" name="transcripts" type="data" label="Transcripts sequences in fastA format" help="" /> + <param name="min_prot_length" type="integer" value="" optional="true" label="Minimum peptide length (in amino acids)" help="default: 100"> + <validator type="in_range" message="Minimum peptide length should be at least 50" min="50" /> + </param> + <param name="retain_long_orfs" type="integer" value="" optional="true" label="Retain all ORFs found that are of minimum length in nucleotides" help="default: 900" > + <validator type="in_range" message="ORF length should be at least 50" min="50" /> + </param> + <param name="training_count" type="integer" value="" optional="true" label="Number of top longest ORFs to train Markov Model (hexamer stats)" help="default: 500" > + <validator type="in_range" message="ORF count should be at least 50" min="50" /> + </param> + <param name="strand_specificity" type="select" label="Strand specificity type"> + <option value="DS">NOT strand specific, examine both strands</option> + <option value="SS">Strand specific, examine only top strand</option> + </param> + <param name="genetic_code" type="select" label="Genetic Code"> + <option value="">use default(universal)</option> + <option value="universal">universal</option> + <option value="Euplotes">Euplotes</option> + <option value="Tetrahymena">Tetrahymena</option> + <option value="Candida">Candida</option> + <option value="Acetabularia">Acetabularia</option> + </param> + <conditional name="search"> + <param name="use_pfam" type="select" label="Search PFAM database"> + <option value="no">NO</option> + <option value="yes">YES</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="pfam_db" type="select" label="Pfam database"> + <options from_data_table="pfam_databases" /> + </param> + <param name="CPU" type="integer" value="2" min="1" label="CPU" help="Number of CPUs to use by hmmscan" /> + </when> + </conditional> + </inputs> + <stdio> + <exit_code range="1:" level="fatal" description="Failed" /> + <regex match="Error" + source="stderr" + level="fatal" + description="Failed" /> + </stdio> + <outputs> + <data format="txt" name="trinity_pep_pfam" label="${tool.name} on ${on_string}: Pfam matches to Candidate Peptide Sequences" from_work_dir="longest_orfs.pep.pfam.dat"> + <filter>search['use_pfam'] == 'yes'</filter> + </data> + <data format="gff3" name="trinity_pep_gff3" label="${tool.name} on ${on_string} Candidate Peptide Features" from_work_dir="best_candidates.eclipsed_orfs_removed.gff3" /> + <data format="bed" name="trinity_pep_bed" label="${tool.name} on ${on_string} Candidate Peptide Coordinates" from_work_dir="best_candidates.eclipsed_orfs_removed.bed" /> + <data format="fasta" name="trinity_pep_cds" label="${tool.name} on ${on_string}: Candidate Peptide CDS Sequences" from_work_dir="best_candidates.eclipsed_orfs_removed.cds"/> + <data format="fasta" name="trinity_pep_seqs" label="${tool.name} on ${on_string}: Candidate Peptide Sequences" from_work_dir="best_candidates.eclipsed_orfs_removed.pep"/> + </outputs> + <tests> + <test> + <param name="transcripts" ftype="fasta" value="TrinitySingle.fasta"/> + <param name="min_prot_length" value="100"/> + <param name="use_pfam" value="no"/> + <output name="trinity_pep_seqs"> + <assert_contents> + <has_text text="WAAKAWLITARSLYPADF" /> + </assert_contents> + </output> + <output name="trinity_pep_cds"> + <assert_contents> + <has_text text="TGGGCAGCCAAGGCATGGCTGATCACGGCCCGCA" /> + </assert_contents> + </output> + <output name="trinity_pep_bed"> + <assert_contents> + <has_text text="comp10_c0_seq1" /> + </assert_contents> + </output> + <output name="trinity_pep_gff3"> + <assert_contents> + <has_text text="comp10_c0_seq1" /> + </assert_contents> + </output> + </test> + </tests> + <help> + ** transcriptsToOrfs ** + Trinity_ is a de novo transcript assembler that uses RNA-seq data as input. + This tool searches for open reading frames in the assembled transcripts. + + .. _Trinity: http://trinityrnaseq.sourceforge.net + </help> +</tool>