annotate sra.py @ 35:a93bb9eb93c5 draft

properly end conditional statements
author Matt Shirley <mdshw5@gmail.com>
date Sun, 30 Jun 2013 19:23:07 -0400
parents 423f3eb06428
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
1 """
22
423f3eb06428 more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents: 20
diff changeset
2 NCBI sra class
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
3 """
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
4 import logging
12
b77840618b8f debugging SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 1
diff changeset
5 import binascii
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
6 from galaxy.datatypes.data import *
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
7 from galaxy.datatypes.sniff import *
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
8 from galaxy.datatypes.binary import *
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
9 from galaxy.datatypes.metadata import *
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
10
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
11 log = logging.getLogger(__name__)
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
12
22
423f3eb06428 more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents: 20
diff changeset
13 class sra( Binary ):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
14 """ Sequence Read Archive (SRA) """
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
15 file_ext = 'sra'
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
16
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
17 def __init__( self, **kwd ):
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
18 Binary.__init__( self, **kwd )
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
19 def sniff( self, filename ):
22
423f3eb06428 more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents: 20
diff changeset
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ
423f3eb06428 more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents: 20
diff changeset
21 submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
22 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
23 """
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
24 try:
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
25 header = open(filename).read(8)
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
26 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
27 return True
12
b77840618b8f debugging SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 1
diff changeset
28 else:
b77840618b8f debugging SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 1
diff changeset
29 return False
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
30 except:
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
31 return False
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
32 def set_peek(self, dataset, is_multi_byte=False):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
33 if not dataset.dataset.purged:
22
423f3eb06428 more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents: 20
diff changeset
34 dataset.peek = 'Binary sra file'
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
35 dataset.blurb = data.nice_size(dataset.get_size())
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
36 else:
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
37 dataset.peek = 'file does not exist'
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
38 dataset.blurb = 'file purged from disk'
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
39 def display_peek(self, dataset):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
40 try:
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
41 return dataset.peek
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
42 except:
22
423f3eb06428 more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents: 20
diff changeset
43 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
44
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
45 if hasattr(Binary, 'register_sniffable_binary_format'):
22
423f3eb06428 more fixes for sra datatype, added sra_pileup
Matt Shirley <mdshw5@gmail.com>
parents: 20
diff changeset
46 Binary.register_sniffable_binary_format('sra', 'sra', sra)