Mercurial > repos > jjohnson > snpeff_datatypes
annotate snpeff.py @ 4:5d6f3622b99d default tip
Default to snpeff_version: SnpEff4.0
| author | Jim Johnson <jj@umn.edu> |
|---|---|
| date | Wed, 14 Jan 2015 11:53:36 -0600 |
| parents | 9efd0d32fe8a |
| children |
| rev | line source |
|---|---|
| 0 | 1 """ |
| 2 SnpEff datatypes | |
| 3 """ | |
| 4 import os,os.path,re,sys,gzip,logging | |
| 5 import galaxy.datatypes.data | |
| 6 from galaxy.datatypes.data import Text | |
| 7 from galaxy.datatypes.metadata import MetadataElement | |
| 8 | |
| 9 log = logging.getLogger(__name__) | |
| 10 | |
| 11 class SnpEffDb( Text ): | |
| 12 """Class describing a SnpEff genome build""" | |
| 13 file_ext = "snpeffdb" | |
| 14 MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None ) | |
|
4
5d6f3622b99d
Default to snpeff_version: SnpEff4.0
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
15 MetadataElement( name="snpeff_version", default="SnpEff4.0", desc="SnpEff Version", readonly=True, visible=True, no_value=None ) |
|
1
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
16 MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True) |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
17 MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True) |
| 0 | 18 |
| 19 def __init__( self, **kwd ): | |
| 20 Text.__init__( self, **kwd ) | |
| 21 | |
|
3
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
22 def getSnpeffVersionFromFile(self, path): |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
23 snpeff_version = None |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
24 try: |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
25 fh = gzip.open(path, 'rb') |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
26 buf = fh.read(100) |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
27 lines = buf.splitlines() |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
28 m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip()) |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
29 if m: |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
30 snpeff_version = m.groups()[0] + m.groups()[1] |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
31 fh.close() |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
32 except Exception, e: |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
33 pass |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
34 return snpeff_version |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
35 |
| 0 | 36 def set_meta( self, dataset, **kwd ): |
| 37 Text.set_meta(self, dataset, **kwd ) | |
| 38 data_dir = dataset.extra_files_path | |
| 39 ## search data_dir/genome_version for files | |
| 40 regulation_pattern = 'regulation_(.+).bin' | |
| 41 # annotation files that are included in snpEff by a flag | |
| 42 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} | |
| 43 regulations = [] | |
| 44 annotations = [] | |
|
3
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
45 genome_version = None |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
46 snpeff_version = None |
| 0 | 47 if data_dir and os.path.isdir(data_dir): |
| 48 for root, dirs, files in os.walk(data_dir): | |
| 49 for fname in files: | |
| 50 if fname.startswith('snpEffectPredictor'): | |
| 51 # if snpEffectPredictor.bin download succeeded | |
| 52 genome_version = os.path.basename(root) | |
| 53 dataset.metadata.genome_version = genome_version | |
|
3
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
54 # read the first line of the gzipped snpEffectPredictor.bin file to get the SnpEff version |
|
4
5d6f3622b99d
Default to snpeff_version: SnpEff4.0
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
55 # Starting with version 4.1, this should be: SnpEff 4.1 |
|
3
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
56 snpeff_version = self.getSnpeffVersionFromFile(os.path.join(root,fname)) |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
57 if snpeff_version: |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
58 dataset.metadata.snpeff_version = snpeff_version |
| 0 | 59 else: |
| 60 m = re.match(regulation_pattern,fname) | |
| 61 if m: | |
| 62 name = m.groups()[0] | |
| 63 regulations.append(name) | |
| 64 elif fname in annotations_dict: | |
| 65 value = annotations_dict[fname] | |
| 66 name = value.lstrip('-') | |
| 67 annotations.append(name) | |
| 68 dataset.metadata.regulation = regulations | |
| 69 dataset.metadata.annotation = annotations | |
|
1
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
70 try: |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
71 fh = file(dataset.file_name,'w') |
|
3
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
72 fh.write("%s\n" % genome_version if genome_version else 'Genome unknown') |
|
9efd0d32fe8a
Add snpeff_version to snpeffdb metadata, this is available from snpeff v4.1
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
73 fh.write("%s\n" % snpeff_version if snpeff_version else 'SnpEff version unknown') |
|
1
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
74 if annotations: |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
75 fh.write("annotations: %s\n" % ','.join(annotations)) |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
76 if regulations: |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
77 fh.write("regulations: %s\n" % ','.join(regulations)) |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
78 fh.close() |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
79 except: |
|
cd8f8c54bf9a
Make regulation and annotation metadata optional, write metadata into the dataset after setting metadata
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
80 pass |
| 0 | 81 |
