Mercurial > repos > cschu > candisnp
annotate candisnp.py @ 5:9215ffe7d4d5 draft
Fixed issue with INDEL annotations. Changed output to iframe.
author | cschu |
---|---|
date | Wed, 24 Jun 2015 11:42:40 -0400 |
parents | 36f6520671b3 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 import os | |
4 import sys | |
5 import argparse | |
6 import subprocess | |
7 import struct | |
8 import shutil | |
9 import tempfile | |
10 | |
11 import urllib2 | |
12 import csv | |
13 import json | |
14 | |
15 | |
16 CANDISNP_SERVER = 'http://candisnp.tsl.ac.uk' #:8080' | |
17 | |
18 CANDI_TESTDATA = { | |
19 "ref": "athalianaTair10", | |
20 "data": [{ | |
21 "is_synonymous": "FALSE", | |
22 "change": "E/K", | |
23 "effect": "NON_SYNONYMOUS_CODING", | |
24 "gene": "AT1G01320", | |
25 "allele_freq": 0.291666667, | |
26 "reference_base": "C", | |
27 "position": 126483, | |
28 "chromosome": "1", | |
29 "is_ctga": "TRUE", | |
30 "in_cds": "TRUE", | |
31 "alternate_base": "T", | |
32 }, | |
33 ], | |
34 } | |
35 """ | |
36 'is_ctga', | |
37 'effect', | |
38 'change', | |
39 'gene', | |
40 | |
41 """ | |
42 | |
43 | |
44 SNP_DATA_HEADERS = { | |
45 0: 'chromosome', | |
46 1: 'position', | |
47 3: 'reference_base', | |
48 4: 'alternate_base', | |
49 7: 'info' | |
50 } | |
51 | |
52 contentHeaders = { | |
53 'Content-Type': 'application/json', | |
54 'Accept': 'text/plain' | |
55 } | |
56 | |
57 class SNPEffect(object): | |
58 def __init__(self, string): | |
59 effectAndData = string.strip(')').split('(') | |
60 # sys.stderr.write(str(effectAndData)+'\n') | |
61 self.effect, effectData = effectAndData[0], effectAndData[1] | |
62 effectData = effectData.split('|') | |
63 assert len(effectData) >= 11, 'Invalid effect field %s' % string | |
64 self.impact, self.fClass, changeDist, self.aaChange, self.aaLength, self.gene, self.transcriptBioType, isCoding, self.transcriptID, exonIntronRank, genotypeNumber, warning = (effectData + [''])[:12] | |
65 if self.effect in ('UPSTREAM', 'DOWNSTREAM'): | |
66 self.transcriptDist = changeDist | |
67 self.codonChange = None | |
68 else: | |
69 self.transcriptDist = None | |
70 self.codonChange = changeDist | |
71 self.isCoding = (isCoding == 'CODING') | |
72 self.exonIntronRank = int(exonIntronRank) if exonIntronRank else 'NA' | |
73 self.genotypeNumber = int(genotypeNumber) | |
74 self.warning = warning if warning else None | |
75 | |
76 self.is_synonymous = not self.effect == 'NON_SYNONYMOUS_CODING' | |
77 if self.effect in ('INTRON', 'INTERGENIC'): | |
78 self.in_cds, self.is_synonymous = False, False | |
79 else: | |
80 self.in_cds = True | |
81 try: | |
82 self.change = '%s/%s' % (self.aaChange[0], self.aaChange[-1]) | |
83 except: | |
84 self.change = '' | |
85 | |
86 | |
87 pass | |
88 def toDict(self): | |
89 def is_ctga(ref, alt): | |
90 return (ref == 'C' and alt == 'T') or (ref == 'G' and alt == 'A') | |
91 assert hasattr(self, 'is_synonymous'), 'Missing attribute: is_synonymous' | |
92 assert hasattr(self, 'allele_frequency'), 'Missing attribute: allele_frequency' | |
93 assert hasattr(self, 'reference_base'), 'Missing attribute: reference_base' | |
94 assert hasattr(self, 'alternate_base'), 'Missing attribute: alternate_base' | |
95 return { "is_synonymous": str(self.is_synonymous).upper(), | |
96 "change": self.change if self.change else 'NA', | |
97 "effect": self.effect if self.effect else 'NA', | |
98 "gene": self.gene if self.gene else 'NA', | |
99 "allele_freq": self.allele_frequency, | |
100 "reference_base": self.reference_base, | |
101 "position": self.position, | |
102 "chromosome": self.chromosome, | |
103 "is_ctga": str(is_ctga(self.reference_base, self.alternate_base)).upper(), | |
104 "in_cds": str(self.in_cds).upper(), | |
105 "alternate_base": self.alternate_base, } | |
106 | |
107 | |
108 pass | |
109 | |
110 | |
111 | |
112 class AnnotatedSNPEffectFactory(object): | |
113 def __init__(self, *args, **kwargs): | |
114 assert len(args) >= 7, 'Not enough values in args (%s)' % args | |
115 self.log = kwargs['log'] | |
116 self.chromosome = args[0] | |
117 self.position = int(args[1]) | |
118 self.reference_base = args[3] | |
119 self.alternate_base = args[4] | |
120 | |
121 # info = dict([field.split('=') for field in args[7].strip().split(';')]) | |
5
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
122 # info = dict(field.split('=') for field in args[7].strip().split(';')) |
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
123 info = dict(field.split('=') for field in args[7].strip().split(';') if len(field.split('=')) == 2) |
0 | 124 # self.log.write('***'+str(info.get('EFF', '@@@')+'***\n')) |
125 | |
126 self.allele_frequency = float(info.get('AF', '0.0')) | |
127 self.effects = [effect | |
128 for effect in info.get('EFF', '').split(',') | |
129 if effect] | |
130 # assert self.effects, 'No effects in info-string %s' % info.get('EF', '') | |
131 pass | |
132 | |
133 def getEffects(self, notWanted=('UPSTREAM', 'DOWNSTREAM', 'FRAME_SHIFT')): | |
134 for effectString in self.effects: | |
135 effect = SNPEffect(effectString) | |
136 if effect.effect not in notWanted: | |
137 for attr in ('chromosome', 'position', 'reference_base', 'alternate_base', 'allele_frequency'): | |
138 setattr(effect, attr, getattr(self, attr)) | |
139 yield effect.toDict() | |
140 | |
141 | |
142 pass | |
143 pass | |
144 | |
145 def extractCandiDataFromSnpEffVCF(snpEffVCF, fo): | |
146 snps = [] | |
147 for line in snpEffVCF: | |
148 if not line.startswith('#'): | |
149 #fo.write(line) | |
150 asf = AnnotatedSNPEffectFactory(*line.strip().split('\t'), log=fo) | |
151 snpEffects = list(asf.getEffects()) | |
152 #fo.write(str(snpEffects)+'\n') | |
153 snps.extend(snpEffects) | |
154 return snps | |
155 | |
156 | |
157 | |
158 def main(argv): | |
159 | |
160 descr = '' | |
161 parser = argparse.ArgumentParser(description=descr) | |
162 parser.add_argument('--ref', help='The snpEff genome reference.') | |
163 parser.add_argument('snpEff_output', type=str, help='The input file.') | |
164 parser.add_argument('candisnp_html', type=str, help='The output file.') | |
165 args = parser.parse_args() | |
166 | |
167 fo = open(args.candisnp_html, 'wb') | |
168 """ | |
169 import subprocess | |
170 import urllib2 | |
171 response = urllib2.urlopen('http://ruup.xyz:8080/monitors') | |
172 html = response.read() | |
173 | |
174 | |
175 # p = subprocess.Popen(['wget http://ruup.xyz:8080/monitors'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | |
176 # fo.write(p.communicate()[0]) | |
177 fo.write(html) | |
178 fo.close() | |
179 | |
180 return None | |
181 """ | |
182 """ | |
183 # This works. | |
184 candiData = CANDI_TESTDATA | |
185 candiMessage = json.dumps(candiData) | |
186 """ | |
187 candiData = extractCandiDataFromSnpEffVCF(open(args.snpEff_output), fo) | |
188 candiMessage = json.dumps({'ref': args.ref, 'data': candiData}) | |
189 | |
190 request = urllib2.Request(CANDISNP_SERVER + ':8080', | |
191 headers=contentHeaders, | |
192 data=candiMessage) | |
193 | |
194 | |
195 try: | |
196 response = urllib2.urlopen(request) | |
197 candiURL = response.read() | |
198 except urllib2.URLError, e: | |
199 candiURL = '' | |
200 sys.stderr.write(str(e.reason) + '\n') | |
201 | |
202 if candiURL: | |
5
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
203 # body = urllib2.urlopen(candiURL) |
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
204 # fo.write(body.read()) |
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
205 #fo.write('<iframe name="galaxy_main" id="galaxy_main" frameborder="0" style="position: absolute; width: 100%; height: 100%;" src="%s"></iframe>' % candiURL) |
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
206 #fo.write('<iframe name="galaxy_main" id="galaxy_main" frameborder="0" style="position: absolute; width: 100%; height: 100%;" src="%s"></iframe>' % candiURL) |
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
207 fo.write('<iframe src="%s" frameborder="0" style="position: absolute; width: 100%%; height: 100%%;"></iframe>\n' % candiURL) |
9215ffe7d4d5
Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents:
0
diff
changeset
|
208 # fo.write('<iframe src="%s"></iframe>\n' % candiURL) |
0 | 209 else: |
210 fo.write('I am sorry. CandiSNP does not pick up. Maybe (<a href="%s" target="_blank">try it manually?</a>)\n' % CANDISNP_SERVER) | |
211 | |
212 fo.close() | |
213 pass | |
214 | |
215 # main(sys.argv[1:]) | |
216 | |
217 if __name__ == '__main__': main(sys.argv[1:]) |