changeset 19:a7f57cf408e8 draft

planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
author yating-l
date Fri, 12 Aug 2016 12:03:46 -0400
parents 1debdbe657cd
children 04e57f9ef873
files Group.py README.rst gff2Togff3.py readme.rst snap.xml tool_dependencies.xml
diffstat 6 files changed, 151 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Group.py	Fri Aug 12 12:03:46 2016 -0400
@@ -0,0 +1,59 @@
+from operator import itemgetter
+
+# Input: A group: a list that contains lines belonging to the same gene
+class Group:
+    # Modify "type" column and "attributes" colunm, initialize id, gene, source, stream
+    def __init__(self, group):
+        self.group = group
+        self.id = str(group[0][0])
+        self.source = str(group[0][1])
+        self.stream = str(group[0][6])
+        self.gene = str(group[0][8])
+        for x in range(0, len(group)):
+            self.group[x][2] = "CDS"
+            self.group[x][8] = "Parent=mRNA_" + self.gene
+            self.group[x][3] = int(self.group[x][3])
+            self.group[x][4] = int(self.group[x][4])
+
+    # Order the group elements accoriding to Stream, +: ascanding order, -: descanding order   
+    def order(self):
+        self.num = len(self.group)
+        if self.stream == "+":
+            self.group = sorted(self.group, key=itemgetter(3))
+            self.min_item = self.group[0][3]
+            self.max_item = self.group[self.num-1][4]
+        elif self.stream == "-":
+            self.group = sorted(self.group, key=itemgetter(3), reverse=True)
+            self.min_item = self.group[self.num-1][3]
+            self.max_item = self.group[0][4]
+        else:
+            print("Stream in invalid!\n")
+    
+    def phaseCalculator(self, i, donor = 0):
+        if i >= self.num:
+            pass
+        else:
+            self.type = self.group[i][2]
+            self.size = self.group[i][4] - self.group[i][3] + 1
+        if self.num == 1:
+            if self.type == "Eterm":
+                self.group[i][7] = str(self.size % 3)
+            else:
+                self.group[i][7] = "0"
+        elif self.num > 1 and i < self.num:
+            accept = (3 - donor) % 3
+            self.group[i][7] = str(accept)
+            donor = (self.size - accept) % 3
+            i = i + 1
+            self.phaseCalculator(i, donor)
+            
+    
+    def writer(self, gff3):
+        self.order()
+        self.phaseCalculator(0)
+        gff3.write(self.id + "\t" + self.source + "\tgene\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=" + self.gene + "\n")
+        gff3.write(self.id + "\t" + self.source + "\tmRNA\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=mRNA_" + self.gene + ";Parent=" + self.gene + "\n")
+        for x in range(0, len(self.group)):
+            self.group[x][3] = str(self.group[x][3])
+            self.group[x][4] = str(self.group[x][4])
+            gff3.write("\t".join(self.group[x]) + "\n")
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Fri Aug 12 12:03:46 2016 -0400
@@ -0,0 +1,46 @@
+Galaxy wrapper for SNAP
+========================
+
+This wrapper is copyright 2016-2017 by Yating Liu
+
+This is a wrapper for the gene prediction tool SNAP. SNAP is a general purpose gene finding program suitable for both eukaryotic and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid
+Parser.
+
+Reference
+----------------------
+
+    Korf I. Gene finding in novel Genomes. BMC Bioinformatics 2004, 5:59
+
+Installation
+-----------------------
+
+To install SNAP, please download SNAP from
+
+http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz
+
+and follow the installation instractions. The software is routinely compiled and tested on Mac OS X. It should compile
+fine on any Linux/Unix type operating systems.
+The default compiler is gcc. If you have gcc installed, the easiest is to just compile as:
+```
+  make
+```
+
+The ZOE environment variable is used by SNAP to find the HMM files. Set this
+to the directory containing this file. For example, if you unpackaged the tar-ball in /usr/local/snap, set the ZOE environment variable to /usr/local/snap
+
+```
+    setenv ZOE /usr/local/snap # csh, tcsh, etc
+```
+  or
+```
+    export ZOE=/usr/local/snap # sh, bash, etc
+```
+To install the wrapper copy the snap folder in the galaxy tools and modify the $GALAXY_ROOT/config/tool_conf.xml file to make the tool available to Galaxy. For example:
+```
+<tool file="galaxy/tools/myTools/snap/snap.xml" />
+```
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gff2Togff3.py	Fri Aug 12 12:03:46 2016 -0400
@@ -0,0 +1,42 @@
+
+from Group import Group
+
+class Convertor:
+    def __init__(self, input, output):
+        with open(input) as self.f:
+            self.li = [line.rstrip().split("\t") for line in self.f]
+        self.gff3 = open(output, "w")
+        self.gff3.write("##gff-version 3\n")
+
+    def convert(self):
+        index = 0
+        while index in range(0, len(self.li)):
+            index = self.groupAsgene(index)
+        self.gff3.close()
+                
+                    
+    def groupAsgene(self, start = 0):
+        gene = self.li[start][8]
+        index = len(self.li)
+        for i in range(start+1, len(self.li)):
+            line = self.li[i]
+            if gene != line[8]:
+                index = i
+                break
+        if index >= len(self.li):
+            group = self.li[start:len(self.li)]
+        else:
+            group = self.li[start:index]
+        g = Group(group)
+        g.writer(self.gff3)
+        return index
+
+   
+        
+
+if __name__ == "__main__":
+    file = Convertor("dbia3.gff", "test.txt")
+    file.convert()
+
+
+    
\ No newline at end of file
--- a/readme.rst	Fri Jul 22 11:28:51 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-Galaxy wrapper for SNAP
-========================
-
-This wrapper is copyright 2016-2017 by Yating Liu
-
-This is a wrapper for the gene prediction tool SNAP. SNAP is a general purpose gene finding program suitable for both eukaryotic and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid
-Parser.
-
-Reference
-----------------------
-
-    Korf I. Gene finding in novel Genomes. BMC Bioinformatics 2004, 5:59
-
-Installation
------------------------
-
-To install SNAP, please download SNAP from
-
-http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz
-
-and follow the installation instractions. The software is routinely compiled and tested on Mac OS X. It should compile
-fine on any Linux/Unix type operating systems.
-The default compiler is gcc. If you have gcc installed, the easiest is to just compile as:
-```
-  make
-```
-
-The ZOE environment variable is used by SNAP to find the HMM files. Set this
-to the directory containing this file. For example, if you unpackaged the tar-ball in /usr/local/snap, set the ZOE environment variable to /usr/local/snap
-
-```
-    setenv ZOE /usr/local/snap # csh, tcsh, etc
-```
-  or
-```
-    export ZOE=/usr/local/snap # sh, bash, etc
-```
-To install the wrapper copy the snap folder in the galaxy tools and modify the $GALAXY_ROOT/config/tool_conf.xml file to make the tool available to Galaxy. For example:
-```
-<tool file="galaxy/tools/myTools/snap/snap.xml" />
-```
-
-
-
-
-
--- a/snap.xml	Fri Jul 22 11:28:51 2016 -0400
+++ b/snap.xml	Fri Aug 12 12:03:46 2016 -0400
@@ -1,6 +1,6 @@
-<tool id="snap" name="Semi-HMM-based Nucleic Acid Parser (SNAP)" version="1.0">
+<tool id="snap" name="Semi-HMM-based Nucleic Acid Parser (SNAP)" version="0.1.0">
     <requirements>
-        <requirement type="package" version="1.0">snap</requirement>
+	<requirement type="package" version="0.1.0">snap</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" />
--- a/tool_dependencies.xml	Fri Jul 22 11:28:51 2016 -0400
+++ b/tool_dependencies.xml	Fri Aug 12 12:03:46 2016 -0400
@@ -1,25 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="snap" version="1.0">
-        <install version="1.0">
-            <actions_group>
-                <actions>
-                    <action type="download_by_url">http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz</action>
-                    <action type="shell_command">make</action>
-                    <action type="move_directory_files">
-                        <source_directory>.</source_directory>
-                        <destination_directory>$INSTALL_DIR</destination_directory>
-                    </action>
-                    <action type="set_environment">
-                        <environment_variable name="ZOE" action="set_to">$INSTALL_DIR</environment_variable>
-                        <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable>
-                    </action>
-                </actions>
-            </actions_group>
-        </install>
-        <readme>SNAP is a general purpose gene finding program suitable for both eukaryotic
-            and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid
-            Parser.
-        </readme>
+    <package name="snap" version="0.1.0">
+        <repository changeset_revision="0bc612de916e" name="package_snap_0_1_0" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>