Mercurial > repos > mons > genbank_complete_datatype
changeset 0:0a6a8859078a draft
Uploaded
author | mons |
---|---|
date | Fri, 31 Oct 2014 05:21:47 -0400 |
parents | |
children | a4a890259b82 |
files | datatype_complete/README.txt datatype_complete/README.txt~ datatype_complete/csequence.py datatype_complete/datatypes_conf.xml |
diffstat | 4 files changed, 91 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatype_complete/README.txt Fri Oct 31 05:21:47 2014 -0400 @@ -0,0 +1,15 @@ + +---------------- +GenBank datatype +---------------- + +This package contains the GenBank datatype with sniffer. + +-------------------------------------------------------------------- +Instructions for integration of GenBank datatype into the workflow-system +Galaxy (http://getgalaxy.org) +-------------------------------------------------------------------- + +For best results, we recommand you to install it via the toolshed. + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatype_complete/README.txt~ Fri Oct 31 05:21:47 2014 -0400 @@ -0,0 +1,15 @@ + +---------------- +GenBank datatype +---------------- + +This package contains the datatype genbank without sniffer. + +-------------------------------------------------------------------- +Instructions for integration of GenBank datatype into the workflow-system +Galaxy (http://getgalaxy.org) +-------------------------------------------------------------------- + +For best results, we recommand you to install it via the toolshed. + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatype_complete/csequence.py Fri Oct 31 05:21:47 2014 -0400 @@ -0,0 +1,52 @@ +from galaxy.datatypes import data +from galaxy.datatypes.metadata import MetadataElement +import logging +log = logging.getLogger(__name__) + + +class GenBank( data.Text ): + file_ext = "genbank" + + MetadataElement( name="number_of_sequences", default=0, desc="Number of sequences", readonly=True, visible=True, optional=True, no_value=0 ) + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + # Add our blurb + if (dataset.metadata.number_of_sequences == 1): + dataset.blurb = "1 sequence" + else: + dataset.blurb = "%s sequences" % dataset.metadata.number_of_sequences + # Get + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def get_mime(self): + return 'text/plain' + + def sniff( self, filename ): + header = open(filename).read(5) + return header == 'LOCUS' + + def set_meta( self, dataset, **kwd ): + """ + Set the number of sequences in dataset. + """ + dataset.metadata.number_of_sequences = self._count_genbank_sequences( dataset.file_name ) + + def _count_genbank_sequences( self, filename ): + """ + This is not a perfect definition, but should suffice for general usage. It fails to detect any + errors that would result in parsing errors like incomplete files. + """ + # Specification for the genbank file format can be found in + # ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt + # in section 3.4.4 LOCUS Format + count = 0 + with open( filename ) as gbk: + for line in gbk: + if line[0:5] == 'LOCUS': + count += 1 + return count +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatype_complete/datatypes_conf.xml Fri Oct 31 05:21:47 2014 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="csequence.py"/> + </datatype_files> + <registration> + <datatype extension="genbank" type="galaxy.datatypes.csequence:GenBank" display_in_upload="true" /> + </registration> +</datatypes>