Mercurial > repos > mons > genbank_complete_datatype

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatype_complete/README.txt	Fri Oct 31 05:21:47 2014 -0400
@@ -0,0 +1,15 @@
+
+----------------
+GenBank datatype
+----------------
+
+This package contains the GenBank datatype with sniffer.
+
+--------------------------------------------------------------------
+Instructions for integration of GenBank datatype into the workflow-system
+Galaxy (http://getgalaxy.org)
+--------------------------------------------------------------------
+
+For best results, we recommand you to install it via the toolshed.
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatype_complete/README.txt~	Fri Oct 31 05:21:47 2014 -0400
@@ -0,0 +1,15 @@
+
+----------------
+GenBank datatype
+----------------
+
+This package contains the datatype genbank without sniffer.
+
+--------------------------------------------------------------------
+Instructions for integration of GenBank datatype into the workflow-system
+Galaxy (http://getgalaxy.org)
+--------------------------------------------------------------------
+
+For best results, we recommand you to install it via the toolshed.
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatype_complete/csequence.py	Fri Oct 31 05:21:47 2014 -0400
@@ -0,0 +1,52 @@
+from galaxy.datatypes import data
+from galaxy.datatypes.metadata import MetadataElement
+import logging
+log = logging.getLogger(__name__)
+
+
+class GenBank( data.Text ):
+    file_ext = "genbank"
+
+    MetadataElement( name="number_of_sequences", default=0, desc="Number of sequences", readonly=True, visible=True, optional=True, no_value=0 )
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            # Add our blurb
+            if (dataset.metadata.number_of_sequences == 1):
+                dataset.blurb = "1 sequence"
+            else:
+                dataset.blurb = "%s sequences" % dataset.metadata.number_of_sequences
+            # Get
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def get_mime(self):
+        return 'text/plain'
+
+    def sniff( self, filename ):
+        header = open(filename).read(5)
+        return header == 'LOCUS'
+
+    def set_meta( self, dataset, **kwd ):
+        """
+        Set the number of sequences in dataset.
+        """
+        dataset.metadata.number_of_sequences = self._count_genbank_sequences( dataset.file_name )
+
+    def _count_genbank_sequences( self, filename ):
+        """
+        This is not a perfect definition, but should suffice for general usage. It fails to detect any
+        errors that would result in parsing errors like incomplete files.
+        """
+        # Specification for the genbank file format can be found in
+        # ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
+        # in section 3.4.4 LOCUS Format
+        count = 0
+        with open( filename ) as gbk:
+            for line in gbk:
+                if line[0:5] == 'LOCUS':
+                    count += 1
+        return count
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatype_complete/datatypes_conf.xml	Fri Oct 31 05:21:47 2014 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="csequence.py"/>
+    </datatype_files>
+    <registration>
+		<datatype extension="genbank" type="galaxy.datatypes.csequence:GenBank" display_in_upload="true" />
+    </registration>
+</datatypes>