Mercurial > repos > matt-shirley > sra_tools
view sra.py @ 29:d5f7232e67d9 draft
debug prefetch caching and accession input for fastq_dump
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Sat, 29 Jun 2013 16:01:28 -0400 |
parents | 423f3eb06428 |
children |
line wrap: on
line source
""" NCBI sra class """ import logging import binascii from galaxy.datatypes.data import * from galaxy.datatypes.sniff import * from galaxy.datatypes.binary import * from galaxy.datatypes.metadata import * log = logging.getLogger(__name__) class sra( Binary ): """ Sequence Read Archive (SRA) """ file_ext = 'sra' def __init__( self, **kwd ): Binary.__init__( self, **kwd ) def sniff( self, filename ): """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure """ try: header = open(filename).read(8) if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): return True else: return False except: return False def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: dataset.peek = 'Binary sra file' dataset.blurb = data.nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek(self, dataset): try: return dataset.peek except: return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) if hasattr(Binary, 'register_sniffable_binary_format'): Binary.register_sniffable_binary_format('sra', 'sra', sra)