Mercurial > repos > matt-shirley > sra_tools
diff sra.py @ 22:423f3eb06428 draft
more fixes for sra datatype, added sra_pileup
author | Matt Shirley <mdshw5@gmail.com> |
---|---|
date | Wed, 19 Jun 2013 14:11:37 -0400 |
parents | a1255154fa3f |
children |
line wrap: on
line diff
--- a/sra.py Wed Jun 19 13:31:58 2013 -0400 +++ b/sra.py Wed Jun 19 14:11:37 2013 -0400 @@ -1,5 +1,5 @@ """ -SRA class +NCBI sra class """ import logging import binascii @@ -10,14 +10,15 @@ log = logging.getLogger(__name__) -class SRA( Binary ): +class sra( Binary ): """ Sequence Read Archive (SRA) """ file_ext = 'sra' def __init__( self, **kwd ): Binary.__init__( self, **kwd ) def sniff( self, filename ): - """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. Not sure if EBI and DDBJ files may differ. + """ The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ, though EBI and DDBJ + submissions through NCBI (ERR and DRR accessions) read 'NCBI.sra'. For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure """ try: @@ -30,7 +31,7 @@ return False def set_peek(self, dataset, is_multi_byte=False): if not dataset.dataset.purged: - dataset.peek = 'Binary SRA file' + dataset.peek = 'Binary sra file' dataset.blurb = data.nice_size(dataset.get_size()) else: dataset.peek = 'file does not exist' @@ -39,7 +40,7 @@ try: return dataset.peek except: - return 'Binary SRA file (%s)' % ( data.nice_size(dataset.get_size())) + return 'Binary sra file (%s)' % ( data.nice_size(dataset.get_size())) if hasattr(Binary, 'register_sniffable_binary_format'): - Binary.register_sniffable_binary_format('SRA', 'SRA', SRA) + Binary.register_sniffable_binary_format('sra', 'sra', sra)