diff sra.py @ 22:423f3eb06428 draft

more fixes for sra datatype, added sra_pileup
author Matt Shirley <mdshw5@gmail.com>
date Wed, 19 Jun 2013 14:11:37 -0400
parents a1255154fa3f
children
line wrap: on
line diff
--- a/sra.py	Wed Jun 19 13:31:58 2013 -0400
+++ b/sra.py	Wed Jun 19 14:11:37 2013 -0400
@@ -1,5 +1,5 @@
 """
-SRA class
+NCBI sra class
 """
 import logging
 import binascii
@@ -10,14 +10,15 @@
 
 log = logging.getLogger(__name__)
 
-class SRA( Binary ):
+class sra( Binary ):
     """ Sequence Read Archive (SRA) """
     file_ext = 'sra'
 
     def __init__( self, **kwd ):
         Binary.__init__( self, **kwd )
     def sniff( self, filename ):
-        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. 
+        """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ 
+        submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'.
         For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure 
         """
         try:
@@ -30,7 +31,7 @@
             return False
     def set_peek(self, dataset, is_multi_byte=False):
         if not dataset.dataset.purged:
-            dataset.peek  = 'Binary SRA file'
+            dataset.peek  = 'Binary sra file'
             dataset.blurb = data.nice_size(dataset.get_size())
         else:
             dataset.peek = 'file does not exist'
@@ -39,7 +40,7 @@
         try:
             return dataset.peek
         except:
-            return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size()))
+            return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size()))
 
 if hasattr(Binary, 'register_sniffable_binary_format'):
-    Binary.register_sniffable_binary_format('SRA', 'SRA', SRA)
+    Binary.register_sniffable_binary_format('sra', 'sra', sra)