comparison sra.py @ 22:423f3eb06428 draft

more fixes for sra datatype, added sra_pileup
author Matt Shirley <mdshw5@gmail.com>
date Wed, 19 Jun 2013 14:11:37 -0400
parents a1255154fa3f
children
comparison
equal deleted inserted replaced
21:d850324e82cf 22:423f3eb06428
1 """ 1 """
2 SRA class 2 NCBI sra class
3 """ 3 """
4 import logging 4 import logging
5 import binascii 5 import binascii
6 from galaxy.datatypes.data import * 6 from galaxy.datatypes.data import *
7 from galaxy.datatypes.sniff import * 7 from galaxy.datatypes.sniff import *
8 from galaxy.datatypes.binary import * 8 from galaxy.datatypes.binary import *
9 from galaxy.datatypes.metadata import * 9 from galaxy.datatypes.metadata import *
10 10
11 log = logging.getLogger(__name__) 11 log = logging.getLogger(__name__)
12 12
13 class SRA( Binary ): 13 class sra( Binary ):
14 """ Sequence Read Archive (SRA) """ 14 """ Sequence Read Archive (SRA) """
15 file_ext = 'sra' 15 file_ext = 'sra'
16 16
17 def __init__( self, **kwd ): 17 def __init__( self, **kwd ):
18 Binary.__init__( self, **kwd ) 18 Binary.__init__( self, **kwd )
19 def sniff( self, filename ): 19 def sniff( self, filename ):
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. 20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
21 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure 22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
22 """ 23 """
23 try: 24 try:
24 header = open(filename).read(8) 25 header = open(filename).read(8)
25 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): 26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
28 return False 29 return False
29 except: 30 except:
30 return False 31 return False
31 def set_peek(self, dataset, is_multi_byte=False): 32 def set_peek(self, dataset, is_multi_byte=False):
32 if not dataset.dataset.purged: 33 if not dataset.dataset.purged:
33 dataset.peek = 'Binary SRA file' 34 dataset.peek = 'Binary sra file'
34 dataset.blurb = data.nice_size(dataset.get_size()) 35 dataset.blurb = data.nice_size(dataset.get_size())
35 else: 36 else:
36 dataset.peek = 'file does not exist' 37 dataset.peek = 'file does not exist'
37 dataset.blurb = 'file purged from disk' 38 dataset.blurb = 'file purged from disk'
38 def display_peek(self, dataset): 39 def display_peek(self, dataset):
39 try: 40 try:
40 return dataset.peek 41 return dataset.peek
41 except: 42 except:
42 return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size())) 43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
43 44
44 if hasattr(Binary, 'register_sniffable_binary_format'): 45 if hasattr(Binary, 'register_sniffable_binary_format'):
45 Binary.register_sniffable_binary_format('SRA', 'SRA', SRA) 46 Binary.register_sniffable_binary_format('sra', 'sra', sra)