annotate snpeff.py @ 4:5d6f3622b99d default tip

Default to snpeff_version: SnpEff4.0
author Jim Johnson <jj@umn.edu>
date Wed, 14 Jan 2015 11:53:36 -0600
parents 9efd0d32fe8a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
1 """
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
2 SnpEff datatypes
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
3 """
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
4 import os,os.path,re,sys,gzip,logging
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
5 import galaxy.datatypes.data
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
6 from galaxy.datatypes.data import Text
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
7 from galaxy.datatypes.metadata import MetadataElement
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
8
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
9 log = logging.getLogger(__name__)
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
10
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
11 class SnpEffDb( Text ):
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
12 """Class describing a SnpEff genome build"""
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
13 file_ext = "snpeffdb"
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
14 MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
4
5d6f3622b99d Default to snpeff_version: SnpEff4.0
Jim Johnson <jj@umn.edu>
parents: 3
diff changeset
15 MetadataElement( name="snpeff_version", default="SnpEff4.0", desc="SnpEff Version", readonly=True, visible=True, no_value=None )
1
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
16 MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True)
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
17 MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True)
0
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
18
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
19 def __init__( self, **kwd ):
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
20 Text.__init__( self, **kwd )
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
21
3
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
22 def getSnpeffVersionFromFile(self, path):
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
23 snpeff_version = None
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
24 try:
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
25 fh = gzip.open(path, 'rb')
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
26 buf = fh.read(100)
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
27 lines = buf.splitlines()
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
28 m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip())
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
29 if m:
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
30 snpeff_version = m.groups()[0] + m.groups()[1]
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
31 fh.close()
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
32 except Exception, e:
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
33 pass
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
34 return snpeff_version
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
35
0
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
36 def set_meta( self, dataset, **kwd ):
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
37 Text.set_meta(self, dataset, **kwd )
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
38 data_dir = dataset.extra_files_path
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
39 ## search data_dir/genome_version for files
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
40 regulation_pattern = 'regulation_(.+).bin'
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
41 # annotation files that are included in snpEff by a flag
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
42 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
43 regulations = []
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
44 annotations = []
3
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
45 genome_version = None
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
46 snpeff_version = None
0
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
47 if data_dir and os.path.isdir(data_dir):
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
48 for root, dirs, files in os.walk(data_dir):
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
49 for fname in files:
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
50 if fname.startswith('snpEffectPredictor'):
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
51 # if snpEffectPredictor.bin download succeeded
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
52 genome_version = os.path.basename(root)
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
53 dataset.metadata.genome_version = genome_version
3
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
54 # read the first line of the gzipped snpEffectPredictor.bin file to get the SnpEff version
4
5d6f3622b99d Default to snpeff_version: SnpEff4.0
Jim Johnson <jj@umn.edu>
parents: 3
diff changeset
55 # Starting with version 4.1, this should be: SnpEff 4.1
3
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
56 snpeff_version = self.getSnpeffVersionFromFile(os.path.join(root,fname))
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
57 if snpeff_version:
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
58 dataset.metadata.snpeff_version = snpeff_version
0
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
59 else:
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
60 m = re.match(regulation_pattern,fname)
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
61 if m:
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
62 name = m.groups()[0]
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
63 regulations.append(name)
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
64 elif fname in annotations_dict:
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
65 value = annotations_dict[fname]
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
66 name = value.lstrip('-')
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
67 annotations.append(name)
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
68 dataset.metadata.regulation = regulations
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
69 dataset.metadata.annotation = annotations
1
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
70 try:
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
71 fh = file(dataset.file_name,'w')
3
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
72 fh.write("%s\n" % genome_version if genome_version else 'Genome unknown')
9efd0d32fe8a Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
73 fh.write("%s\n" % snpeff_version if snpeff_version else 'SnpEff version unknown')
1
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
74 if annotations:
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
75 fh.write("annotations: %s\n" % ','.join(annotations))
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
76 if regulations:
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
77 fh.write("regulations: %s\n" % ','.join(regulations))
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
78 fh.close()
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
79 except:
cd8f8c54bf9a Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
80 pass
0
926c8f02b3ba Uploaded
jjohnson
parents:
diff changeset
81