Mercurial > repos > matt-shirley > sra_tools
annotate sra.py @ 35:a93bb9eb93c5 draft
properly end conditional statements
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Sun, 30 Jun 2013 19:23:07 -0400 |
parents | 423f3eb06428 |
children |
rev | line source |
---|---|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
1 """ |
22
423f3eb06428
more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents:
20
diff
changeset
|
2 NCBI sra class |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
3 """ |
16 | 4 import logging |
12 | 5 import binascii |
16 | 6 from galaxy.datatypes.data import * |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
7 from galaxy.datatypes.sniff import * |
16 | 8 from galaxy.datatypes.binary import * |
9 from galaxy.datatypes.metadata import * | |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
10 |
16 | 11 log = logging.getLogger(__name__) |
12 | |
22
423f3eb06428
more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents:
20
diff
changeset
|
13 class sra( Binary ): |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
14 """ Sequence Read Archive (SRA) """ |
1 | 15 file_ext = 'sra' |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
16 |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
17 def __init__( self, **kwd ): |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
18 Binary.__init__( self, **kwd ) |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
19 def sniff( self, filename ): |
22
423f3eb06428
more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents:
20
diff
changeset
|
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ |
423f3eb06428
more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents:
20
diff
changeset
|
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. |
1 | 22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure |
23 """ | |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
24 try: |
1 | 25 header = open(filename).read(8) |
26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): | |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
27 return True |
12 | 28 else: |
29 return False | |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
30 except: |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
31 return False |
1 | 32 def set_peek(self, dataset, is_multi_byte=False): |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
33 if not dataset.dataset.purged: |
22
423f3eb06428
more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents:
20
diff
changeset
|
34 dataset.peek = 'Binary sra file' |
1 | 35 dataset.blurb = data.nice_size(dataset.get_size()) |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
36 else: |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
37 dataset.peek = 'file does not exist' |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
38 dataset.blurb = 'file purged from disk' |
1 | 39 def display_peek(self, dataset): |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
40 try: |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
41 return dataset.peek |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
42 except: |
22
423f3eb06428
more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents:
20
diff
changeset
|
43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) |
16 | 44 |
45 if hasattr(Binary, 'register_sniffable_binary_format'): | |
22
423f3eb06428
more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents:
20
diff
changeset
|
46 Binary.register_sniffable_binary_format('sra', 'sra', sra) |