Mercurial > repos > mons > genbank_complete_datatype
view datatype_complete/csequence.py @ 1:a4a890259b82 draft default tip
Deleted selected files
author | mons |
---|---|
date | Fri, 31 Oct 2014 09:58:22 -0400 |
parents | 0a6a8859078a |
children |
line wrap: on
line source
from galaxy.datatypes import data from galaxy.datatypes.metadata import MetadataElement import logging log = logging.getLogger(__name__) class GenBank( data.Text ): file_ext = "genbank" MetadataElement( name="number_of_sequences", default=0, desc="Number of sequences", readonly=True, visible=True, optional=True, no_value=0 ) def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: # Add our blurb if (dataset.metadata.number_of_sequences == 1): dataset.blurb = "1 sequence" else: dataset.blurb = "%s sequences" % dataset.metadata.number_of_sequences # Get dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def get_mime(self): return 'text/plain' def sniff( self, filename ): header = open(filename).read(5) return header == 'LOCUS' def set_meta( self, dataset, **kwd ): """ Set the number of sequences in dataset. """ dataset.metadata.number_of_sequences = self._count_genbank_sequences( dataset.file_name ) def _count_genbank_sequences( self, filename ): """ This is not a perfect definition, but should suffice for general usage. It fails to detect any errors that would result in parsing errors like incomplete files. """ # Specification for the genbank file format can be found in # ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt # in section 3.4.4 LOCUS Format count = 0 with open( filename ) as gbk: for line in gbk: if line[0:5] == 'LOCUS': count += 1 return count