view sra.py @ 17:c57efec65750 draft

updates to SRA datatype
author Matt Shirley <mdshw5@gmail.com>
date Mon, 17 Jun 2013 20:37:28 -0400
parents 93a60318b9ca
children a1255154fa3f
line wrap: on
line source

"""
Sra class
"""
import logging
import binascii
from galaxy.datatypes.data import *
from galaxy.datatypes.sniff import *
from galaxy.datatypes.binary import *
from galaxy.datatypes.metadata import *

log = logging.getLogger(__name__)

class SRA( Binary ):
    """ Sequence Read Archive (SRA) """
    file_ext = 'sra'

    def __init__( self, **kwd ):
        Binary.__init__( self, **kwd )
    def sniff( self, filename ):
        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. 
        For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure 
        """
        try:
            header = open(filename).read(8)
            if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
                return True
            else:
                return False
        except:
            return False
    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek  = 'Binary SRA file'
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'
    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size()))

if hasattr(Binary, 'register_sniffable_binary_format'):
    Binary.register_sniffable_binary_format('SRA', 'SRA', SRA)