annotate sra.py @ 21:d850324e82cf draft

fix datatype reference, increment versions
author Matt Shirley <mdshw5@gmail.com>
date Wed, 19 Jun 2013 13:31:58 -0400
parents a1255154fa3f
children 423f3eb06428
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
1 """
20
a1255154fa3f tweaks to fastq-dump command
Matt Shirley <mdshw5@gmail.com>
parents: 16
diff changeset
2 SRA class
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
3 """
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
4 import logging
12
b77840618b8f debugging SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 1
diff changeset
5 import binascii
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
6 from galaxy.datatypes.data import *
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
7 from galaxy.datatypes.sniff import *
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
8 from galaxy.datatypes.binary import *
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
9 from galaxy.datatypes.metadata import *
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
10
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
11 log = logging.getLogger(__name__)
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
12
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
13 class SRA( Binary ):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
14 """ Sequence Read Archive (SRA) """
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
15 file_ext = 'sra'
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
16
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
17 def __init__( self, **kwd ):
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
18 Binary.__init__( self, **kwd )
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
19 def sniff( self, filename ):
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
20 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ.
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
21 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
22 """
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
23 try:
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
24 header = open(filename).read(8)
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
25 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
26 return True
12
b77840618b8f debugging SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 1
diff changeset
27 else:
b77840618b8f debugging SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 1
diff changeset
28 return False
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
29 except:
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
30 return False
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
31 def set_peek(self, dataset, is_multi_byte=False):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
32 if not dataset.dataset.purged:
12
b77840618b8f debugging SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 1
diff changeset
33 dataset.peek = 'Binary SRA file'
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
34 dataset.blurb = data.nice_size(dataset.get_size())
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
35 else:
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
36 dataset.peek = 'file does not exist'
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
37 dataset.blurb = 'file purged from disk'
1
28fda7631857 minor fixes
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
38 def display_peek(self, dataset):
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
39 try:
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
40 return dataset.peek
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
41 except:
16
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
42 return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size()))
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
43
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
44 if hasattr(Binary, 'register_sniffable_binary_format'):
93a60318b9ca updates to SRA datatype
Matt Shirley <mdshw5@gmail.com>
parents: 12
diff changeset
45 Binary.register_sniffable_binary_format('SRA', 'SRA', SRA)