Mercurial > repos > matt-shirley > sra_tools
view sra.py @ 17:c57efec65750 draft
updates to SRA datatype
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Mon, 17 Jun 2013 20:37:28 -0400 |
parents | 93a60318b9ca |
children | a1255154fa3f |
line wrap: on
line source
""" Sra class """ import logging import binascii from galaxy.datatypes.data import * from galaxy.datatypes.sniff import * from galaxy.datatypes.binary import * from galaxy.datatypes.metadata import * log = logging.getLogger(__name__) class SRA( Binary ): """ Sequence Read Archive (SRA) """ file_ext = 'sra' def __init__( self, **kwd ): Binary.__init__( self, **kwd ) def sniff( self, filename ): """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure """ try: header = open(filename).read(8) if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): return True else: return False except: return False def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = 'Binary SRA file' dataset.blurb = data.nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset): try: return dataset.peek except: return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size())) if hasattr(Binary, 'register_sniffable_binary_format'): Binary.register_sniffable_binary_format('SRA', 'SRA', SRA)