Mercurial > repos > jjohnson > snpeff_datatypes
view snpeff.py @ 4:5d6f3622b99d default tip
Default to snpeff_version: SnpEff4.0
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Wed, 14 Jan 2015 11:53:36 -0600 |
parents | 9efd0d32fe8a |
children |
line wrap: on
line source
""" SnpEff datatypes """ import os,os.path,re,sys,gzip,logging import galaxy.datatypes.data from galaxy.datatypes.data import Text from galaxy.datatypes.metadata import MetadataElement log = logging.getLogger(__name__) class SnpEffDb( Text ): """Class describing a SnpEff genome build""" file_ext = "snpeffdb" MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None ) MetadataElement( name="snpeff_version", default="SnpEff4.0", desc="SnpEff Version", readonly=True, visible=True, no_value=None ) MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True) MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True) def __init__( self, **kwd ): Text.__init__( self, **kwd ) def getSnpeffVersionFromFile(self, path): snpeff_version = None try: fh = gzip.open(path, 'rb') buf = fh.read(100) lines = buf.splitlines() m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip()) if m: snpeff_version = m.groups()[0] + m.groups()[1] fh.close() except Exception, e: pass return snpeff_version def set_meta( self, dataset, **kwd ): Text.set_meta(self, dataset, **kwd ) data_dir = dataset.extra_files_path ## search data_dir/genome_version for files regulation_pattern = 'regulation_(.+).bin' # annotation files that are included in snpEff by a flag annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} regulations = [] annotations = [] genome_version = None snpeff_version = None if data_dir and os.path.isdir(data_dir): for root, dirs, files in os.walk(data_dir): for fname in files: if fname.startswith('snpEffectPredictor'): # if snpEffectPredictor.bin download succeeded genome_version = os.path.basename(root) dataset.metadata.genome_version = genome_version # read the first line of the gzipped snpEffectPredictor.bin file to get the SnpEff version # Starting with version 4.1, this should be: SnpEff 4.1 snpeff_version = self.getSnpeffVersionFromFile(os.path.join(root,fname)) if snpeff_version: dataset.metadata.snpeff_version = snpeff_version else: m = re.match(regulation_pattern,fname) if m: name = m.groups()[0] regulations.append(name) elif fname in annotations_dict: value = annotations_dict[fname] name = value.lstrip('-') annotations.append(name) dataset.metadata.regulation = regulations dataset.metadata.annotation = annotations try: fh = file(dataset.file_name,'w') fh.write("%s\n" % genome_version if genome_version else 'Genome unknown') fh.write("%s\n" % snpeff_version if snpeff_version else 'SnpEff version unknown') if annotations: fh.write("annotations: %s\n" % ','.join(annotations)) if regulations: fh.write("regulations: %s\n" % ','.join(regulations)) fh.close() except: pass