Mercurial > repos > matt-shirley > sra_tools
annotate sra.py @ 13:45031bbf6b27 draft
better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Mon, 17 Jun 2013 16:17:33 -0400 |
parents | b77840618b8f |
children | 93a60318b9ca |
rev | line source |
---|---|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
1 """ |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
2 Sra class |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
3 """ |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
4 |
12 | 5 import binascii |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
6 from galaxy.datatypes.binary import Binary |
12 | 7 from galaxy.datatypes.data import Data |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
8 from galaxy.datatypes.metadata import MetadataElement |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
9 from galaxy.datatypes.sniff import * |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
10 |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
11 class Sra( Binary ): |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
12 """ Sequence Read Archive (SRA) """ |
1 | 13 file_ext = 'sra' |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
14 |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
15 def __init__( self, **kwd ): |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
16 Binary.__init__( self, **kwd ) |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
17 def sniff( self, filename ): |
1 | 18 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. |
19 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure | |
20 """ | |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
21 try: |
1 | 22 header = open(filename).read(8) |
23 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): | |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
24 return True |
12 | 25 else: |
26 return False | |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
27 except: |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
28 return False |
1 | 29 def set_peek(self, dataset, is_multi_byte=False): |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
30 if not dataset.dataset.purged: |
12 | 31 dataset.peek = 'Binary SRA file' |
1 | 32 dataset.blurb = data.nice_size(dataset.get_size()) |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
33 else: |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
34 dataset.peek = 'file does not exist' |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
35 dataset.blurb = 'file purged from disk' |
1 | 36 def display_peek(self, dataset): |
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
37 try: |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
38 return dataset.peek |
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
39 except: |
1 | 40 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) |