Mercurial > repos > yating-l > snap
changeset 19:a7f57cf408e8 draft
planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
author | yating-l |
---|---|
date | Fri, 12 Aug 2016 12:03:46 -0400 |
parents | 1debdbe657cd |
children | 04e57f9ef873 |
files | Group.py README.rst gff2Togff3.py readme.rst snap.xml tool_dependencies.xml |
diffstat | 6 files changed, 151 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Group.py Fri Aug 12 12:03:46 2016 -0400 @@ -0,0 +1,59 @@ +from operator import itemgetter + +# Input: A group: a list that contains lines belonging to the same gene +class Group: + # Modify "type" column and "attributes" colunm, initialize id, gene, source, stream + def __init__(self, group): + self.group = group + self.id = str(group[0][0]) + self.source = str(group[0][1]) + self.stream = str(group[0][6]) + self.gene = str(group[0][8]) + for x in range(0, len(group)): + self.group[x][2] = "CDS" + self.group[x][8] = "Parent=mRNA_" + self.gene + self.group[x][3] = int(self.group[x][3]) + self.group[x][4] = int(self.group[x][4]) + + # Order the group elements accoriding to Stream, +: ascanding order, -: descanding order + def order(self): + self.num = len(self.group) + if self.stream == "+": + self.group = sorted(self.group, key=itemgetter(3)) + self.min_item = self.group[0][3] + self.max_item = self.group[self.num-1][4] + elif self.stream == "-": + self.group = sorted(self.group, key=itemgetter(3), reverse=True) + self.min_item = self.group[self.num-1][3] + self.max_item = self.group[0][4] + else: + print("Stream in invalid!\n") + + def phaseCalculator(self, i, donor = 0): + if i >= self.num: + pass + else: + self.type = self.group[i][2] + self.size = self.group[i][4] - self.group[i][3] + 1 + if self.num == 1: + if self.type == "Eterm": + self.group[i][7] = str(self.size % 3) + else: + self.group[i][7] = "0" + elif self.num > 1 and i < self.num: + accept = (3 - donor) % 3 + self.group[i][7] = str(accept) + donor = (self.size - accept) % 3 + i = i + 1 + self.phaseCalculator(i, donor) + + + def writer(self, gff3): + self.order() + self.phaseCalculator(0) + gff3.write(self.id + "\t" + self.source + "\tgene\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=" + self.gene + "\n") + gff3.write(self.id + "\t" + self.source + "\tmRNA\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=mRNA_" + self.gene + ";Parent=" + self.gene + "\n") + for x in range(0, len(self.group)): + self.group[x][3] = str(self.group[x][3]) + self.group[x][4] = str(self.group[x][4]) + gff3.write("\t".join(self.group[x]) + "\n") \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Fri Aug 12 12:03:46 2016 -0400 @@ -0,0 +1,46 @@ +Galaxy wrapper for SNAP +======================== + +This wrapper is copyright 2016-2017 by Yating Liu + +This is a wrapper for the gene prediction tool SNAP. SNAP is a general purpose gene finding program suitable for both eukaryotic and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid +Parser. + +Reference +---------------------- + + Korf I. Gene finding in novel Genomes. BMC Bioinformatics 2004, 5:59 + +Installation +----------------------- + +To install SNAP, please download SNAP from + +http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz + +and follow the installation instractions. The software is routinely compiled and tested on Mac OS X. It should compile +fine on any Linux/Unix type operating systems. +The default compiler is gcc. If you have gcc installed, the easiest is to just compile as: +``` + make +``` + +The ZOE environment variable is used by SNAP to find the HMM files. Set this +to the directory containing this file. For example, if you unpackaged the tar-ball in /usr/local/snap, set the ZOE environment variable to /usr/local/snap + +``` + setenv ZOE /usr/local/snap # csh, tcsh, etc +``` + or +``` + export ZOE=/usr/local/snap # sh, bash, etc +``` +To install the wrapper copy the snap folder in the galaxy tools and modify the $GALAXY_ROOT/config/tool_conf.xml file to make the tool available to Galaxy. For example: +``` +<tool file="galaxy/tools/myTools/snap/snap.xml" /> +``` + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff2Togff3.py Fri Aug 12 12:03:46 2016 -0400 @@ -0,0 +1,42 @@ + +from Group import Group + +class Convertor: + def __init__(self, input, output): + with open(input) as self.f: + self.li = [line.rstrip().split("\t") for line in self.f] + self.gff3 = open(output, "w") + self.gff3.write("##gff-version 3\n") + + def convert(self): + index = 0 + while index in range(0, len(self.li)): + index = self.groupAsgene(index) + self.gff3.close() + + + def groupAsgene(self, start = 0): + gene = self.li[start][8] + index = len(self.li) + for i in range(start+1, len(self.li)): + line = self.li[i] + if gene != line[8]: + index = i + break + if index >= len(self.li): + group = self.li[start:len(self.li)] + else: + group = self.li[start:index] + g = Group(group) + g.writer(self.gff3) + return index + + + + +if __name__ == "__main__": + file = Convertor("dbia3.gff", "test.txt") + file.convert() + + + \ No newline at end of file
--- a/readme.rst Fri Jul 22 11:28:51 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -Galaxy wrapper for SNAP -======================== - -This wrapper is copyright 2016-2017 by Yating Liu - -This is a wrapper for the gene prediction tool SNAP. SNAP is a general purpose gene finding program suitable for both eukaryotic and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid -Parser. - -Reference ----------------------- - - Korf I. Gene finding in novel Genomes. BMC Bioinformatics 2004, 5:59 - -Installation ------------------------ - -To install SNAP, please download SNAP from - -http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz - -and follow the installation instractions. The software is routinely compiled and tested on Mac OS X. It should compile -fine on any Linux/Unix type operating systems. -The default compiler is gcc. If you have gcc installed, the easiest is to just compile as: -``` - make -``` - -The ZOE environment variable is used by SNAP to find the HMM files. Set this -to the directory containing this file. For example, if you unpackaged the tar-ball in /usr/local/snap, set the ZOE environment variable to /usr/local/snap - -``` - setenv ZOE /usr/local/snap # csh, tcsh, etc -``` - or -``` - export ZOE=/usr/local/snap # sh, bash, etc -``` -To install the wrapper copy the snap folder in the galaxy tools and modify the $GALAXY_ROOT/config/tool_conf.xml file to make the tool available to Galaxy. For example: -``` -<tool file="galaxy/tools/myTools/snap/snap.xml" /> -``` - - - - -
--- a/snap.xml Fri Jul 22 11:28:51 2016 -0400 +++ b/snap.xml Fri Aug 12 12:03:46 2016 -0400 @@ -1,6 +1,6 @@ -<tool id="snap" name="Semi-HMM-based Nucleic Acid Parser (SNAP)" version="1.0"> +<tool id="snap" name="Semi-HMM-based Nucleic Acid Parser (SNAP)" version="0.1.0"> <requirements> - <requirement type="package" version="1.0">snap</requirement> + <requirement type="package" version="0.1.0">snap</requirement> </requirements> <stdio> <exit_code range="1:" />
--- a/tool_dependencies.xml Fri Jul 22 11:28:51 2016 -0400 +++ b/tool_dependencies.xml Fri Aug 12 12:03:46 2016 -0400 @@ -1,25 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="snap" version="1.0"> - <install version="1.0"> - <actions_group> - <actions> - <action type="download_by_url">http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz</action> - <action type="shell_command">make</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR</destination_directory> - </action> - <action type="set_environment"> - <environment_variable name="ZOE" action="set_to">$INSTALL_DIR</environment_variable> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable> - </action> - </actions> - </actions_group> - </install> - <readme>SNAP is a general purpose gene finding program suitable for both eukaryotic - and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid - Parser. - </readme> + <package name="snap" version="0.1.0"> + <repository changeset_revision="0bc612de916e" name="package_snap_0_1_0" owner="yating-l" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>