Mercurial > repos > matt-shirley > sra_tools
annotate sra.py @ 8:e40b9582d80f draft
debugging automatic SRA build and install
| author | Matt Shirley <mdshw5@gmail.com> |
|---|---|
| date | Mon, 17 Jun 2013 14:00:25 -0400 |
| parents | 28fda7631857 |
| children | b77840618b8f |
| rev | line source |
|---|---|
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
1 """ |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
2 Sra class |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
3 """ |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
4 |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
5 import galaxy.datatypes.binary |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
6 from galaxy.datatypes.binary import Binary |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
7 import data, logging, binascii |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
8 from galaxy.datatypes.metadata import MetadataElement |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
9 from galaxy.datatypes import metadata |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
10 from galaxy.datatypes.sniff import * |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
11 from galaxy import eggs |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
12 import pkg_resources |
| 1 | 13 pkg_resources.require( 'bx-python' ) |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
14 import os, subprocess, tempfile |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
15 import struct |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
16 |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
17 class Sra( Binary ): |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
18 """ Sequence Read Archive (SRA) """ |
| 1 | 19 file_ext = 'sra' |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
20 |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
21 def __init__( self, **kwd ): |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
22 Binary.__init__( self, **kwd ) |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
23 def sniff( self, filename ): |
| 1 | 24 """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. |
| 25 For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure | |
| 26 """ | |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
27 try: |
| 1 | 28 header = open(filename).read(8) |
| 29 if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): | |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
30 return True |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
31 return False |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
32 except: |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
33 return False |
| 1 | 34 def set_peek(self, dataset, is_multi_byte=False): |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
35 if not dataset.dataset.purged: |
| 1 | 36 dataset.peek = 'Binary sra file' |
| 37 dataset.blurb = data.nice_size(dataset.get_size()) | |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
38 else: |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
39 dataset.peek = 'file does not exist' |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
40 dataset.blurb = 'file purged from disk' |
| 1 | 41 def display_peek(self, dataset): |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
42 try: |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
43 return dataset.peek |
|
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
44 except: |
| 1 | 45 return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) |
|
0
ffdd41766195
Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff
changeset
|
46 |
| 1 | 47 Binary.register_sniffable_binary_format('sra', 'sra', Sra) |
