Mercurial > repos > jjohnson > snpeff_datatypes
comparison snpeff.py @ 3:9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 13 Jan 2015 12:30:20 -0600 |
parents | lib/galaxy/datatypes/snpeff.py@cd8f8c54bf9a |
children | 5d6f3622b99d |
comparison
equal
deleted
inserted
replaced
2:b33911fdbac4 | 3:9efd0d32fe8a |
---|---|
1 """ | |
2 SnpEff datatypes | |
3 """ | |
4 import os,os.path,re,sys,gzip,logging | |
5 import galaxy.datatypes.data | |
6 from galaxy.datatypes.data import Text | |
7 from galaxy.datatypes.metadata import MetadataElement | |
8 | |
9 log = logging.getLogger(__name__) | |
10 | |
11 class SnpEffDb( Text ): | |
12 """Class describing a SnpEff genome build""" | |
13 file_ext = "snpeffdb" | |
14 MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None ) | |
15 MetadataElement( name="snpeff_version", default=None, desc="SnpEff Version", readonly=True, visible=True, no_value=None ) | |
16 MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True) | |
17 MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True) | |
18 | |
19 def __init__( self, **kwd ): | |
20 Text.__init__( self, **kwd ) | |
21 | |
22 def getSnpeffVersionFromFile(self, path): | |
23 snpeff_version = None | |
24 try: | |
25 fh = gzip.open(path, 'rb') | |
26 buf = fh.read(100) | |
27 lines = buf.splitlines() | |
28 m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip()) | |
29 if m: | |
30 snpeff_version = m.groups()[0] + m.groups()[1] | |
31 fh.close() | |
32 except Exception, e: | |
33 pass | |
34 return snpeff_version | |
35 | |
36 def set_meta( self, dataset, **kwd ): | |
37 Text.set_meta(self, dataset, **kwd ) | |
38 data_dir = dataset.extra_files_path | |
39 ## search data_dir/genome_version for files | |
40 regulation_pattern = 'regulation_(.+).bin' | |
41 # annotation files that are included in snpEff by a flag | |
42 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} | |
43 regulations = [] | |
44 annotations = [] | |
45 genome_version = None | |
46 snpeff_version = None | |
47 if data_dir and os.path.isdir(data_dir): | |
48 for root, dirs, files in os.walk(data_dir): | |
49 for fname in files: | |
50 if fname.startswith('snpEffectPredictor'): | |
51 # if snpEffectPredictor.bin download succeeded | |
52 genome_version = os.path.basename(root) | |
53 dataset.metadata.genome_version = genome_version | |
54 # read the first line of the gzipped snpEffectPredictor.bin file to get the SnpEff version | |
55 snpeff_version = self.getSnpeffVersionFromFile(os.path.join(root,fname)) | |
56 if snpeff_version: | |
57 dataset.metadata.snpeff_version = snpeff_version | |
58 else: | |
59 m = re.match(regulation_pattern,fname) | |
60 if m: | |
61 name = m.groups()[0] | |
62 regulations.append(name) | |
63 elif fname in annotations_dict: | |
64 value = annotations_dict[fname] | |
65 name = value.lstrip('-') | |
66 annotations.append(name) | |
67 dataset.metadata.regulation = regulations | |
68 dataset.metadata.annotation = annotations | |
69 try: | |
70 fh = file(dataset.file_name,'w') | |
71 fh.write("%s\n" % genome_version if genome_version else 'Genome unknown') | |
72 fh.write("%s\n" % snpeff_version if snpeff_version else 'SnpEff version unknown') | |
73 if annotations: | |
74 fh.write("annotations: %s\n" % ','.join(annotations)) | |
75 if regulations: | |
76 fh.write("regulations: %s\n" % ','.join(regulations)) | |
77 fh.close() | |
78 except: | |
79 pass | |
80 |