changeset 3:f4416f1a674a draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/trna_prediction commit cfb19d75629f02e0dea4475c16c016ed5510eb44
author bgruening
date Wed, 26 Jul 2017 10:13:35 -0400
parents 6d97da269ee2
children
files aragorn.xml tRNAscan.py tRNAscan.xml test-data/aragorn_tansl-table-1_tmRNA_tRNA.txt tool_dependencies.xml trna_prediction.tar.gz
diffstat 6 files changed, 223 insertions(+), 132 deletions(-) [+]
line wrap: on
line diff
--- a/aragorn.xml	Thu Sep 17 16:49:26 2015 -0400
+++ b/aragorn.xml	Wed Jul 26 10:13:35 2017 -0400
@@ -1,25 +1,27 @@
-<tool id="aragorn_trna" name="tRNA and tmRNA" version="0.5">
+<tool id="aragorn_trna" name="tRNA and tmRNA" version="0.6">
     <description>prediction (Aragorn)</description>
     <requirements>
         <requirement type="package" version="1.2.36">aragorn</requirement>
+        <requirement type="package" version="2.7">python</requirement>
     </requirements>
-    <command>
-<![CDATA[
-        aragorn
-            $input
-            -gc$genbank_gencode
-            $tmRNA
-            $tRNA
-            $mtRNA
-            $mam_mtRNA
-            $topology
-            -o $output
-            $secondary_structure
-            $introns;
-
+    <command><![CDATA[
+        #if not $gff3_output:
+            aragorn
+                '$input'
+                -gc$genbank_gencode
+                $tmRNA
+                $tRNA
+                $mtRNA
+                $mam_mtRNA
+                $topology
+                -o '$output'
+                $secondary_structure
+                $introns
+        #end if
+   
         #if $gff3_output:
             aragorn
-                $input
+                '$input'
                 -gc$genbank_gencode
                 $tmRNA
                 $tRNA
@@ -28,14 +30,14 @@
                 $topology
                 $introns
                 -w
-                | python $__tool_directory__/aragorn_out_to_gff3.py $gff3_model > $gff3_output_file;
+            | python '$__tool_directory__/aragorn_out_to_gff3.py' $gff3_model > '$gff3_output_file'
         #end if
-]]>
+		]]>
     </command>
     <inputs>
         <param name="input" type="data" format="fasta" label="Genome Sequence"/>
         <param name="genbank_gencode" type="select" label="Genetic code">
-            <option value="1" select="True">1. Standard</option>
+            <option value="1" selected="true">1. Standard</option>
             <option value="2">2. Vertebrate Mitochondrial</option>
             <option value="3">3. Yeast Mitochondrial</option>
             <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
@@ -76,7 +78,7 @@
     <outputs>
         <data name="output" format="fasta">
             <change_format>
-               <when input="secondary_structure" value="true" format="text"/>
+               <when input="secondary_structure" value="-fasta" format="txt"/>
              </change_format>
         </data>
         <data format="gff3" name="gff3_output_file" >
@@ -93,9 +95,34 @@
             <param name="mtRNA" value="False" />
             <param name="mam_mtRNA" value="False" />
             <param name="introns" value="False" />
-            <param name="secondary_structure" value="False" />
+            <param name="secondary_structure" value="-fon" />
+            <param name="gff3_output" value="false" />
+            <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.fasta" ftype="fasta" />
+        </test>
+		
+        <test>
+            <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" />
+            <param name="genbank_gencode" value="1" />
+            <param name="topology" value="-c" />
+            <param name="tmRNA" value="True" />
+            <param name="tRNA" value="True" />
+            <param name="mtRNA" value="False" />
+            <param name="mam_mtRNA" value="False" />
+            <param name="introns" value="False" />
+            <param name="secondary_structure" value="-fasta" />
+            <param name="gff3_output" value="false" />
+            <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.txt" ftype="txt" lines_diff="2" />
+        </test>
+        <test>
+            <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" />
+            <param name="genbank_gencode" value="1" />
+            <param name="topology" value="-c" />
+            <param name="tmRNA" value="True" />
+            <param name="tRNA" value="True" />
+            <param name="mtRNA" value="False" />
+            <param name="mam_mtRNA" value="False" />
+            <param name="introns" value="False" />
             <param name="gff3_output" value="True" />
-            <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.fasta" ftype="fasta" />
             <output name="gff3_output_file" file="aragorn_tansl-table-1_tmRNA_tRNA.gff3" ftype="gff3" />
         </test>
         <test>
--- a/tRNAscan.py	Thu Sep 17 16:49:26 2015 -0400
+++ b/tRNAscan.py	Wed Jul 26 10:13:35 2017 -0400
@@ -8,13 +8,12 @@
 from Bio.SeqRecord import SeqRecord
 import subprocess
 
-
 def main(args):
     """
         Call from galaxy:
         tRNAscan.py $organism $mode $showPrimSecondOpt $disablePseudo $showCodons $tabular_output $inputfile $fasta_output
 
-            tRNAscan-SE $organism $mode $showPrimSecondOpt $disablePseudo $showCodons -d -Q -y -q -b -o $tabular_output $inputfile;
+            tRNAscan-SE $organism $mode $showPrimSecondOpt $disablePseudo $showCodons -Q -y -q -b -o $tabular_output $inputfile;
     """
     cmd = """tRNAscan-SE -Q -y -q -b %s""" % ' '.join( args[:-1] )
     child = subprocess.Popen(cmd.split(),
--- a/tRNAscan.xml	Thu Sep 17 16:49:26 2015 -0400
+++ b/tRNAscan.xml	Wed Jul 26 10:13:35 2017 -0400
@@ -1,38 +1,50 @@
-<tool id="trnascan" name="tRNA prediction" version="0.3">
+<tool id="trnascan" name="tRNA prediction" version="0.4">
     <description>(tRNAscan)</description>
     <requirements>
-        <requirement type="package" version="1.3.1">tRNAscan-SE</requirement>
-        <requirement type="package" version="1.61">biopython</requirement>
+      <requirement type="package" version="1.3.1">trnascan-se</requirement>
+      <requirement type="package" version="1.0.2">infernal</requirement>
+      <requirement type="package" version="1.70">biopython</requirement>
+      <requirement type="package" version="2.7">python</requirement>
     </requirements>
-    <command interpreter="python">
-<![CDATA[
-        tRNAscan.py
-            $organism
-            $mode
-            $showPrimSecondOpt
-            $disablePseudo
-            $showCodons
-            -o
-            $tabular_output
-            $inputfile
-            $fasta_output
-]]>
+    <command>
+      <![CDATA[
+python '$__tool_directory__/tRNAscan.py'
+#if $organism
+    $organism
+#end if
+#if $mode
+    $mode
+#end if
+#if $showPrimSecondOpt
+    $showPrimSecondOpt
+#end if
+#if $disablePseudo
+    $disablePseudo
+#end if
+#if $showCodons
+    $showCodons
+#end if
+-o
+'$tabular_output'
+'$inputfile'
+'$fasta_output'
+      ]]>
     </command>
     <inputs>
         <param name="inputfile" type="data" format="fasta" label="Genome Sequence" help="Dataset missing? See TIP below"/>
         <param name="organism" type="select" label="Select Organism">
-            <option value="">Eukaryotic</option>
+            <option value="" selected="true">Eukaryotic</option>
             <option value="-G">general tRNA model</option>
             <option value="-B">Bacterial</option>
             <option value="-A">Archaeal</option>
             <option value="-O">Mitochondrial/Chloroplast</option>
         </param>
         <param name="mode" type="select" label="Select Mode">
-            <option value="">Default</option>
+            <option value="" selected="true">Default</option>
             <option value="-C">Covariance model analysis only (slow)</option>
             <option value="-T">tRNAscan only</option>
             <option value="-E">EufindtRNA only</option>
-            <option value="--infernal">Infernal cm analysis (max sensitivity, very slow)</option>
+            <option value="--infernal">Infernal cm analysis (max sensitivity, very slow)</option> 
             <option value="--newscan">Infernal and new cm models</option>
         </param>
         <param name="disablePseudo" type="boolean" label="Disable pseudogene checking" truevalue="-D" falsevalue="" />
@@ -45,14 +57,14 @@
     </outputs>
     <tests>
         <test>
-            <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" />
+            <param name="inputfile" value="trna_arabidopsis.fasta" ftype="fasta" />
             <param name="organism" value="" />
-            <param name="mode" value="--infernal" />
+            <param name="mode" value="--infernal" /> <!-- Infernal test not working due to cmsearch error-->
             <param name="disablePseudo" value="" />
             <param name="showPrimSecondOpt" value="" />
             <param name="showCodons" value="" />
             <output name="fasta_output" file="tRNAscan_eukaryotic_infernal.fasta" ftype="fasta" />
-            <output name="fasta_output" file="tRNAscan_eukaryotic_infernal.tabular" ftype="tabular" />
+            <output name="tabular_output" file="tRNAscan_eukaryotic_infernal.tabular" ftype="tabular" />
         </test>
     </tests>
     <help>
@@ -82,41 +94,41 @@
 
 - use general tRNA model:
 
-	This option selects the general tRNA covariance model that was trained
-	on tRNAs from all three phylogenetic domains (Archaea, Bacteria, and
-	Eukarya). This mode can be used when analyzing a mixed collection of
-	sequences from more than one phylogenetic domain, with only slight
-	loss of sensitivity and selectivity. The original publication
-	describing this program and tRNAscan-SE version 1.0 used this general
-	tRNA model exclusively. If you wish to compare scores to those found
-	in the paper or scans using v1.0, use this option. Use of this option
-	is compatible with all other search mode options described in this
-	section.
+    This option selects the general tRNA covariance model that was trained
+    on tRNAs from all three phylogenetic domains (Archaea, Bacteria, and
+    Eukarya). This mode can be used when analyzing a mixed collection of
+    sequences from more than one phylogenetic domain, with only slight
+    loss of sensitivity and selectivity. The original publication
+    describing this program and tRNAscan-SE version 1.0 used this general
+    tRNA model exclusively. If you wish to compare scores to those found
+    in the paper or scans using v1.0, use this option. Use of this option
+    is compatible with all other search mode options described in this
+    section.
 
 - search for bacterial tRNAs
 
-	This option selects the bacterial covariance model for tRNA analysis,
-	and loosens the search parameters for EufindtRNA to improve detection
-	of bacterial tRNAs. Use of this mode with bacterial sequences
-	will also improve bounds prediction of the 3' end (the terminal CAA
-	triplet).
+    This option selects the bacterial covariance model for tRNA analysis,
+    and loosens the search parameters for EufindtRNA to improve detection
+    of bacterial tRNAs. Use of this mode with bacterial sequences
+    will also improve bounds prediction of the 3' end (the terminal CAA
+    triplet).
 
 - search for archaeal tRNAs
 
-	This option selects an archaeal-specific covariance model for tRNA
-	analysis, as well as slightly loosening the EufindtRNA search
-	cutoffs.
+    This option selects an archaeal-specific covariance model for tRNA
+    analysis, as well as slightly loosening the EufindtRNA search
+    cutoffs.
 
 - search for organellar (mitochondrial/chloroplast) tRNAs
 
-	This parameter bypasses the fast first-pass scanners that are poor at
-	detecting organellar tRNAs and runs Cove analysis only. Since true
-	organellar tRNAs have been found to have Cove scores between 15 and 20
-	bits, the search cutoff is lowered from 20 to 15 bits. Also,
-	pseudogene checking is disabled since it is only applicable to
-	eukaryotic cytoplasmic tRNA pseudogenes. Since Cove-only mode is
-	used, searches will be very slow (see -C option below) relative to the
-	default mode.
+    This parameter bypasses the fast first-pass scanners that are poor at
+    detecting organellar tRNAs and runs Cove analysis only. Since true
+    organellar tRNAs have been found to have Cove scores between 15 and 20
+    bits, the search cutoff is lowered from 20 to 15 bits. Also,
+    pseudogene checking is disabled since it is only applicable to
+    eukaryotic cytoplasmic tRNA pseudogenes. Since Cove-only mode is
+    used, searches will be very slow (see -C option below) relative to the
+    default mode.
 
 
 
@@ -124,29 +136,29 @@
 
 - search using Cove analysis only (max sensitivity, slow)
 
-	Directs tRNAscan-SE to analyze sequences using Cove analysis only.
-	This option allows a slightly more sensitive search than the default
-	tRNAscan + EufindtRNA -> Cove mode, but is much slower (by approx. 250
-	to 3,000 fold). Output format and other program defaults are
-	otherwise identical to the normal analysis.
+    Directs tRNAscan-SE to analyze sequences using Cove analysis only.
+    This option allows a slightly more sensitive search than the default
+    tRNAscan + EufindtRNA -> Cove mode, but is much slower (by approx. 250
+    to 3,000 fold). Output format and other program defaults are
+    otherwise identical to the normal analysis.
 
 - search using Eukaryotic tRNA finder (EufindtRNA) only:
 
-	This option runs EufindtRNA alone to search for tRNAs. Since Cove is
-	not being used as a secondary filter to remove false positives, this
-	run mode defaults to "Normal" parameters which more closely
-	approximates the sensitivity and selectivity of the original algorithm
-	describe by Pavesi and colleagues.
+    This option runs EufindtRNA alone to search for tRNAs. Since Cove is
+    not being used as a secondary filter to remove false positives, this
+    run mode defaults to "Normal" parameters which more closely
+    approximates the sensitivity and selectivity of the original algorithm
+    describe by Pavesi and colleagues.
 
 - search using tRNAscan only (defaults to strict search parameters)
 
-	Directs tRNAscan-SE to use only tRNAscan to analyze sequences. This
-	mode will cause tRNAscan to default to using "strict" parameters
-	(similar to tRNAscan version 1.3 operation). This mode of operation
-	is faster (about 3-5 times faster than default mode analysis), but
-	will result in approximately 0.2 to 0.6 false positive tRNAs per Mbp,
-	decreased sensitivity, and less reliable prediction of anticodons,
-	tRNA isotype, and introns.
+    Directs tRNAscan-SE to use only tRNAscan to analyze sequences. This
+    mode will cause tRNAscan to default to using "strict" parameters
+    (similar to tRNAscan version 1.3 operation). This mode of operation
+    is faster (about 3-5 times faster than default mode analysis), but
+    will result in approximately 0.2 to 0.6 false positive tRNAs per Mbp,
+    decreased sensitivity, and less reliable prediction of anticodons,
+    tRNA isotype, and introns.
 
 - search using Infernal cm analysis only (max sensitivity, very slow)
 
@@ -157,32 +169,32 @@
 
 **disable pseudogene checking**
 
-	Manually disable checking tRNAs for poor primary or secondary
-	structure scores often indicative of eukaryotic pseudogenes. This
-	will slightly speed the program and may be necessary for non-eukaryotic
-	sequences that are flagged as possible pseudogenes but are known to be
-	functional tRNAs.
+    Manually disable checking tRNAs for poor primary or secondary
+    structure scores often indicative of eukaryotic pseudogenes. This
+    will slightly speed the program and may be necessary for non-eukaryotic
+    sequences that are flagged as possible pseudogenes but are known to be
+    functional tRNAs.
 
 
 **Show both primary and secondary structure score components to covariance model bit scores**
 
-	This option displays the breakdown of the two components of the
-	covariance model bit score. Since tRNA pseudogenes often have one
-	very low component (good secondary structure but poor primary sequence
-	similarity to the tRNA model, or vice versa), this information may be
-	useful in deciding whether a low-scoring tRNA is likely to be a
-	pseudogene. The heuristic pseudogene detection filter uses this
-	information to flag possible pseudogenes -- use this option to see why
-	a hit is marked as a possible pseudogene. The user may wish to
-	examine score breakdowns from known tRNAs in the organism of interest
-	to get a frame of reference.
+    This option displays the breakdown of the two components of the
+    covariance model bit score. Since tRNA pseudogenes often have one
+    very low component (good secondary structure but poor primary sequence
+    similarity to the tRNA model, or vice versa), this information may be
+    useful in deciding whether a low-scoring tRNA is likely to be a
+    pseudogene. The heuristic pseudogene detection filter uses this
+    information to flag possible pseudogenes -- use this option to see why
+    a hit is marked as a possible pseudogene. The user may wish to
+    examine score breakdowns from known tRNAs in the organism of interest
+    to get a frame of reference.
 
 
 
 **Show codons instead of tRNA anticodons**
 
-	This option causes tRNAscan-SE to output a tRNA's corresponding codon
-	in place of its anticodon.
+    This option causes tRNAscan-SE to output a tRNA's corresponding codon
+    in place of its anticodon.
 
 
 
@@ -190,15 +202,15 @@
 
 **input**
 
-	>CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7
-	GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
-	GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
-	TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
-	TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
-	GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
-	ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
-	AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
-	.....
+    >CELF22B7  C.aenorhabditis elegans (Bristol N2) cosmid F22B7
+    GATCCTTGTAGATTTTGAATTTGAAGTTTTTTCTCATTCCAAAACTCTGT
+    GATCTGAAATAAAATGTCTCAAAAAAATAGAAGAAAACATTGCTTTATAT
+    TTATCAGTTATGGTTTTCAAAATTTTCTGACATACCGTTTTGCTTCTTTT
+    TTTCTCATCTTCTTCAAATATCAATTGTGATAATCTGACTCCTAACAATC
+    GAATTTCTTTTCCTTTTTCTTTTTCCAACAACTCCAGTGAGAACTTTTGA
+    ATATCTTCAAGTGACTTCACCACATCAGAAGGTGTCAACGATCTTGTGAG
+    AACATCGAATGAAGATAATTTTAATTTTAGAGTTACAGTTTTTCCTCCGA
+    .....
 
 
 **output**
@@ -217,14 +229,9 @@
     ========     ======    =====     ======    ====    ==========    ======    ======    ==========    ==========
 
 
-
-
-
 ]]>
     </help>
-    
     <citations>
         <citation type="doi">10.1093/nar/25.5.0955</citation>
     </citations>
-    
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/aragorn_tansl-table-1_tmRNA_tRNA.txt	Wed Jul 26 10:13:35 2017 -0400
@@ -0,0 +1,70 @@
+------------------------------
+ARAGORN v1.2.36   Dean Laslett
+------------------------------
+
+Please reference the following paper if you use this
+program as part of any published research.
+
+Laslett, D. and Canback, B. (2004) ARAGORN, a
+program for the detection of transfer RNA and
+transfer-messenger RNA genes in nucleotide sequences.
+Nucleic Acids Research, 32;11-16.
+
+
+Searching for tRNA genes with no introns
+Searching for tmRNA genes
+Assuming circular topology, search wraps around ends
+Searching both strands
+Using standard genetic code
+
+
+gi|240255695:23036500-23037000 Arabidopsis thaliana chromosome 3, complete sequence
+501 nucleotides in sequence
+Mean G+C content = 43.1%
+ 
+1.
+ 
+ 
+ 
+               a
+             g-c
+             g-c
+             g+t
+             g-c
+             a-t
+             t-a
+             g-c     tt
+            t   gtccc  a
+     ta    a    !!!!!  g
+    a  ctcg     caggg  c
+   t   !!!!    a     tt
+   g   gagc     c
+    gta    g     g
+            c-gag
+            t-a
+            c-g
+            g-c
+            c-g
+           t   t
+           t   a
+            tgc
+ 
+ 
+ 
+    tRNA-Ala(tgc)
+    73 bases, %GC = 56.2
+    Sequence [381,453]
+ 
+
+
+>tRNA-Ala(tgc) [381,453]
+ggggatgtagctcatatggtagagcgctcgctttgcatgcgagaggcaca
+gggttcgattccctgcatctcca
+
+
+
+
+Number of tmRNA genes = 0
+
+
+Configuration: aragorn /tmp/tmpx1qAPk/files/000/dataset_3.dat -gc1 -m -t -c -o /tmp/tmpx1qAPk/files/000/dataset_4.dat -fasta 
--- a/tool_dependencies.xml	Thu Sep 17 16:49:26 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="biopython" version="1.61">
-        <repository changeset_revision="2f6c871cfa35" name="package_biopython_1_61" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="aragorn" version="1.2.36">
-        <repository changeset_revision="d561a0a9f601" name="package_aragorn_1_2_36" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="tRNAscan-SE" version="1.3.1">
-        <repository changeset_revision="b754416c3030" name="package_trnascan_1_3_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
Binary file trna_prediction.tar.gz has changed