Mercurial > repos > matt-shirley > sra_tools
comparison sra.py @ 22:423f3eb06428 draft
more fixes for sra datatype, added sra_pileup
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Wed, 19 Jun 2013 14:11:37 -0400 |
parents | a1255154fa3f |
children |
comparison
equal
deleted
inserted
replaced
21:d850324e82cf | 22:423f3eb06428 |
---|---|
1 """ | 1 """ |
2 SRA class | 2 NCBI sra class |
3 """ | 3 """ |
4 import logging | 4 import logging |
5 import binascii | 5 import binascii |
6 from galaxy.datatypes.data import * | 6 from galaxy.datatypes.data import * |
7 from galaxy.datatypes.sniff import * | 7 from galaxy.datatypes.sniff import * |
8 from galaxy.datatypes.binary import * | 8 from galaxy.datatypes.binary import * |
9 from galaxy.datatypes.metadata import * | 9 from galaxy.datatypes.metadata import * |
10 | 10 |
11 log = logging.getLogger(__name__) | 11 log = logging.getLogger(__name__) |
12 | 12 |
13 class SRA( Binary ): | 13 class sra( Binary ): |
14 """ Sequence Read Archive (SRA) """ | 14 """ Sequence Read Archive (SRA) """ |
15 file_ext = 'sra' | 15 file_ext = 'sra' |
16 | 16 |
17 def __init__( self, **kwd ): | 17 def __init__( self, **kwd ): |
18 Binary.__init__( self, **kwd ) | 18 Binary.__init__( self, **kwd ) |
19 def sniff( self, filename ): | 19 def sniff( self, filename ): |
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. | 20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ |
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. | |
21 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure | 22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure |
22 """ | 23 """ |
23 try: | 24 try: |
24 header = open(filename).read(8) | 25 header = open(filename).read(8) |
25 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): | 26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): |
28 return False | 29 return False |
29 except: | 30 except: |
30 return False | 31 return False |
31 def set_peek(self, dataset, is_multi_byte=False): | 32 def set_peek(self, dataset, is_multi_byte=False): |
32 if not dataset.dataset.purged: | 33 if not dataset.dataset.purged: |
33 dataset.peek = 'Binary SRA file' | 34 dataset.peek = 'Binary sra file' |
34 dataset.blurb = data.nice_size(dataset.get_size()) | 35 dataset.blurb = data.nice_size(dataset.get_size()) |
35 else: | 36 else: |
36 dataset.peek = 'file does not exist' | 37 dataset.peek = 'file does not exist' |
37 dataset.blurb = 'file purged from disk' | 38 dataset.blurb = 'file purged from disk' |
38 def display_peek(self, dataset): | 39 def display_peek(self, dataset): |
39 try: | 40 try: |
40 return dataset.peek | 41 return dataset.peek |
41 except: | 42 except: |
42 return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size())) | 43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) |
43 | 44 |
44 if hasattr(Binary, 'register_sniffable_binary_format'): | 45 if hasattr(Binary, 'register_sniffable_binary_format'): |
45 Binary.register_sniffable_binary_format('SRA', 'SRA', SRA) | 46 Binary.register_sniffable_binary_format('sra', 'sra', sra) |