0
|
1 """
|
|
2 Sra class
|
|
3 """
|
|
4
|
|
5 import galaxy.datatypes.binary
|
|
6 from galaxy.datatypes.binary import Binary
|
|
7 import data, logging, binascii
|
|
8 from galaxy.datatypes.metadata import MetadataElement
|
|
9 from galaxy.datatypes import metadata
|
|
10 from galaxy.datatypes.sniff import *
|
|
11 from galaxy import eggs
|
|
12 import pkg_resources
|
|
13 pkg_resources.require( "bx-python" )
|
|
14 import os, subprocess, tempfile
|
|
15 import struct
|
|
16
|
|
17 class Sra( Binary ):
|
|
18 """ Sequence Read Archive (SRA) """
|
|
19 file_ext = "sra"
|
|
20
|
|
21 def __init__( self, **kwd ):
|
|
22 Binary.__init__( self, **kwd )
|
|
23 def sniff( self, filename ):
|
|
24 # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details
|
|
25 # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
|
|
26 try:
|
|
27 header = open( filename ).read(8)
|
|
28 if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ):
|
|
29 return True
|
|
30 return False
|
|
31 except:
|
|
32 return False
|
|
33 def set_peek( self, dataset, is_multi_byte=False ):
|
|
34 if not dataset.dataset.purged:
|
|
35 dataset.peek = "Binary sra file"
|
|
36 dataset.blurb = data.nice_size( dataset.get_size() )
|
|
37 else:
|
|
38 dataset.peek = 'file does not exist'
|
|
39 dataset.blurb = 'file purged from disk'
|
|
40 def display_peek( self, dataset ):
|
|
41 try:
|
|
42 return dataset.peek
|
|
43 except:
|
|
44 return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) )
|
|
45
|
|
46 Binary.register_sniffable_binary_format("sra", "sra", Sra)
|