view sra.py @ 40:f1b83804013f draft

add option to take plaintext file with accession on one line
author Matt Shirley <mdshw5@gmail.com>
date Mon, 05 Aug 2013 09:40:26 -0400
parents 423f3eb06428
children
line wrap: on
line source

"""
NCBI sra class
"""
import logging
import binascii
from galaxy.datatypes.data import *
from galaxy.datatypes.sniff import *
from galaxy.datatypes.binary import *
from galaxy.datatypes.metadata import *

log = logging.getLogger(__name__)

class sra( Binary ):
    """ Sequence Read Archive (SRA) """
    file_ext = 'sra'

    def __init__( self, **kwd ):
        Binary.__init__( self, **kwd )
    def sniff( self, filename ):
        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ 
        submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
        For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure 
        """
        try:
            header = open(filename).read(8)
            if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
                return True
            else:
                return False
        except:
            return False
    def set_peek(self, dataset, is_multi_byte=False):
        if not dataset.dataset.purged:
            dataset.peek  = 'Binary sra file'
            dataset.blurb = data.nice_size(dataset.get_size())
        else:
            dataset.peek = 'file does not exist'
            dataset.blurb = 'file purged from disk'
    def display_peek(self, dataset):
        try:
            return dataset.peek
        except:
            return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))

if hasattr(Binary, 'register_sniffable_binary_format'):
    Binary.register_sniffable_binary_format('sra', 'sra', sra)