annotate candisnp.py @ 5:9215ffe7d4d5 draft

Fixed issue with INDEL annotations. Changed output to iframe.
author cschu
date Wed, 24 Jun 2015 11:42:40 -0400
parents 36f6520671b3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
36f6520671b3 Uploaded
cschu
parents:
diff changeset
1 #!/usr/bin/env python
36f6520671b3 Uploaded
cschu
parents:
diff changeset
2
36f6520671b3 Uploaded
cschu
parents:
diff changeset
3 import os
36f6520671b3 Uploaded
cschu
parents:
diff changeset
4 import sys
36f6520671b3 Uploaded
cschu
parents:
diff changeset
5 import argparse
36f6520671b3 Uploaded
cschu
parents:
diff changeset
6 import subprocess
36f6520671b3 Uploaded
cschu
parents:
diff changeset
7 import struct
36f6520671b3 Uploaded
cschu
parents:
diff changeset
8 import shutil
36f6520671b3 Uploaded
cschu
parents:
diff changeset
9 import tempfile
36f6520671b3 Uploaded
cschu
parents:
diff changeset
10
36f6520671b3 Uploaded
cschu
parents:
diff changeset
11 import urllib2
36f6520671b3 Uploaded
cschu
parents:
diff changeset
12 import csv
36f6520671b3 Uploaded
cschu
parents:
diff changeset
13 import json
36f6520671b3 Uploaded
cschu
parents:
diff changeset
14
36f6520671b3 Uploaded
cschu
parents:
diff changeset
15
36f6520671b3 Uploaded
cschu
parents:
diff changeset
16 CANDISNP_SERVER = 'http://candisnp.tsl.ac.uk' #:8080'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
17
36f6520671b3 Uploaded
cschu
parents:
diff changeset
18 CANDI_TESTDATA = {
36f6520671b3 Uploaded
cschu
parents:
diff changeset
19 "ref": "athalianaTair10",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
20 "data": [{
36f6520671b3 Uploaded
cschu
parents:
diff changeset
21 "is_synonymous": "FALSE",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
22 "change": "E/K",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
23 "effect": "NON_SYNONYMOUS_CODING",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
24 "gene": "AT1G01320",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
25 "allele_freq": 0.291666667,
36f6520671b3 Uploaded
cschu
parents:
diff changeset
26 "reference_base": "C",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
27 "position": 126483,
36f6520671b3 Uploaded
cschu
parents:
diff changeset
28 "chromosome": "1",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
29 "is_ctga": "TRUE",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
30 "in_cds": "TRUE",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
31 "alternate_base": "T",
36f6520671b3 Uploaded
cschu
parents:
diff changeset
32 },
36f6520671b3 Uploaded
cschu
parents:
diff changeset
33 ],
36f6520671b3 Uploaded
cschu
parents:
diff changeset
34 }
36f6520671b3 Uploaded
cschu
parents:
diff changeset
35 """
36f6520671b3 Uploaded
cschu
parents:
diff changeset
36 'is_ctga',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
37 'effect',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
38 'change',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
39 'gene',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
40
36f6520671b3 Uploaded
cschu
parents:
diff changeset
41 """
36f6520671b3 Uploaded
cschu
parents:
diff changeset
42
36f6520671b3 Uploaded
cschu
parents:
diff changeset
43
36f6520671b3 Uploaded
cschu
parents:
diff changeset
44 SNP_DATA_HEADERS = {
36f6520671b3 Uploaded
cschu
parents:
diff changeset
45 0: 'chromosome',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
46 1: 'position',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
47 3: 'reference_base',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
48 4: 'alternate_base',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
49 7: 'info'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
50 }
36f6520671b3 Uploaded
cschu
parents:
diff changeset
51
36f6520671b3 Uploaded
cschu
parents:
diff changeset
52 contentHeaders = {
36f6520671b3 Uploaded
cschu
parents:
diff changeset
53 'Content-Type': 'application/json',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
54 'Accept': 'text/plain'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
55 }
36f6520671b3 Uploaded
cschu
parents:
diff changeset
56
36f6520671b3 Uploaded
cschu
parents:
diff changeset
57 class SNPEffect(object):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
58 def __init__(self, string):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
59 effectAndData = string.strip(')').split('(')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
60 # sys.stderr.write(str(effectAndData)+'\n')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
61 self.effect, effectData = effectAndData[0], effectAndData[1]
36f6520671b3 Uploaded
cschu
parents:
diff changeset
62 effectData = effectData.split('|')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
63 assert len(effectData) >= 11, 'Invalid effect field %s' % string
36f6520671b3 Uploaded
cschu
parents:
diff changeset
64 self.impact, self.fClass, changeDist, self.aaChange, self.aaLength, self.gene, self.transcriptBioType, isCoding, self.transcriptID, exonIntronRank, genotypeNumber, warning = (effectData + [''])[:12]
36f6520671b3 Uploaded
cschu
parents:
diff changeset
65 if self.effect in ('UPSTREAM', 'DOWNSTREAM'):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
66 self.transcriptDist = changeDist
36f6520671b3 Uploaded
cschu
parents:
diff changeset
67 self.codonChange = None
36f6520671b3 Uploaded
cschu
parents:
diff changeset
68 else:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
69 self.transcriptDist = None
36f6520671b3 Uploaded
cschu
parents:
diff changeset
70 self.codonChange = changeDist
36f6520671b3 Uploaded
cschu
parents:
diff changeset
71 self.isCoding = (isCoding == 'CODING')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
72 self.exonIntronRank = int(exonIntronRank) if exonIntronRank else 'NA'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
73 self.genotypeNumber = int(genotypeNumber)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
74 self.warning = warning if warning else None
36f6520671b3 Uploaded
cschu
parents:
diff changeset
75
36f6520671b3 Uploaded
cschu
parents:
diff changeset
76 self.is_synonymous = not self.effect == 'NON_SYNONYMOUS_CODING'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
77 if self.effect in ('INTRON', 'INTERGENIC'):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
78 self.in_cds, self.is_synonymous = False, False
36f6520671b3 Uploaded
cschu
parents:
diff changeset
79 else:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
80 self.in_cds = True
36f6520671b3 Uploaded
cschu
parents:
diff changeset
81 try:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
82 self.change = '%s/%s' % (self.aaChange[0], self.aaChange[-1])
36f6520671b3 Uploaded
cschu
parents:
diff changeset
83 except:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
84 self.change = ''
36f6520671b3 Uploaded
cschu
parents:
diff changeset
85
36f6520671b3 Uploaded
cschu
parents:
diff changeset
86
36f6520671b3 Uploaded
cschu
parents:
diff changeset
87 pass
36f6520671b3 Uploaded
cschu
parents:
diff changeset
88 def toDict(self):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
89 def is_ctga(ref, alt):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
90 return (ref == 'C' and alt == 'T') or (ref == 'G' and alt == 'A')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
91 assert hasattr(self, 'is_synonymous'), 'Missing attribute: is_synonymous'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
92 assert hasattr(self, 'allele_frequency'), 'Missing attribute: allele_frequency'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
93 assert hasattr(self, 'reference_base'), 'Missing attribute: reference_base'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
94 assert hasattr(self, 'alternate_base'), 'Missing attribute: alternate_base'
36f6520671b3 Uploaded
cschu
parents:
diff changeset
95 return { "is_synonymous": str(self.is_synonymous).upper(),
36f6520671b3 Uploaded
cschu
parents:
diff changeset
96 "change": self.change if self.change else 'NA',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
97 "effect": self.effect if self.effect else 'NA',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
98 "gene": self.gene if self.gene else 'NA',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
99 "allele_freq": self.allele_frequency,
36f6520671b3 Uploaded
cschu
parents:
diff changeset
100 "reference_base": self.reference_base,
36f6520671b3 Uploaded
cschu
parents:
diff changeset
101 "position": self.position,
36f6520671b3 Uploaded
cschu
parents:
diff changeset
102 "chromosome": self.chromosome,
36f6520671b3 Uploaded
cschu
parents:
diff changeset
103 "is_ctga": str(is_ctga(self.reference_base, self.alternate_base)).upper(),
36f6520671b3 Uploaded
cschu
parents:
diff changeset
104 "in_cds": str(self.in_cds).upper(),
36f6520671b3 Uploaded
cschu
parents:
diff changeset
105 "alternate_base": self.alternate_base, }
36f6520671b3 Uploaded
cschu
parents:
diff changeset
106
36f6520671b3 Uploaded
cschu
parents:
diff changeset
107
36f6520671b3 Uploaded
cschu
parents:
diff changeset
108 pass
36f6520671b3 Uploaded
cschu
parents:
diff changeset
109
36f6520671b3 Uploaded
cschu
parents:
diff changeset
110
36f6520671b3 Uploaded
cschu
parents:
diff changeset
111
36f6520671b3 Uploaded
cschu
parents:
diff changeset
112 class AnnotatedSNPEffectFactory(object):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
113 def __init__(self, *args, **kwargs):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
114 assert len(args) >= 7, 'Not enough values in args (%s)' % args
36f6520671b3 Uploaded
cschu
parents:
diff changeset
115 self.log = kwargs['log']
36f6520671b3 Uploaded
cschu
parents:
diff changeset
116 self.chromosome = args[0]
36f6520671b3 Uploaded
cschu
parents:
diff changeset
117 self.position = int(args[1])
36f6520671b3 Uploaded
cschu
parents:
diff changeset
118 self.reference_base = args[3]
36f6520671b3 Uploaded
cschu
parents:
diff changeset
119 self.alternate_base = args[4]
36f6520671b3 Uploaded
cschu
parents:
diff changeset
120
36f6520671b3 Uploaded
cschu
parents:
diff changeset
121 # info = dict([field.split('=') for field in args[7].strip().split(';')])
5
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
122 # info = dict(field.split('=') for field in args[7].strip().split(';'))
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
123 info = dict(field.split('=') for field in args[7].strip().split(';') if len(field.split('=')) == 2)
0
36f6520671b3 Uploaded
cschu
parents:
diff changeset
124 # self.log.write('***'+str(info.get('EFF', '@@@')+'***\n'))
36f6520671b3 Uploaded
cschu
parents:
diff changeset
125
36f6520671b3 Uploaded
cschu
parents:
diff changeset
126 self.allele_frequency = float(info.get('AF', '0.0'))
36f6520671b3 Uploaded
cschu
parents:
diff changeset
127 self.effects = [effect
36f6520671b3 Uploaded
cschu
parents:
diff changeset
128 for effect in info.get('EFF', '').split(',')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
129 if effect]
36f6520671b3 Uploaded
cschu
parents:
diff changeset
130 # assert self.effects, 'No effects in info-string %s' % info.get('EF', '')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
131 pass
36f6520671b3 Uploaded
cschu
parents:
diff changeset
132
36f6520671b3 Uploaded
cschu
parents:
diff changeset
133 def getEffects(self, notWanted=('UPSTREAM', 'DOWNSTREAM', 'FRAME_SHIFT')):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
134 for effectString in self.effects:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
135 effect = SNPEffect(effectString)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
136 if effect.effect not in notWanted:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
137 for attr in ('chromosome', 'position', 'reference_base', 'alternate_base', 'allele_frequency'):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
138 setattr(effect, attr, getattr(self, attr))
36f6520671b3 Uploaded
cschu
parents:
diff changeset
139 yield effect.toDict()
36f6520671b3 Uploaded
cschu
parents:
diff changeset
140
36f6520671b3 Uploaded
cschu
parents:
diff changeset
141
36f6520671b3 Uploaded
cschu
parents:
diff changeset
142 pass
36f6520671b3 Uploaded
cschu
parents:
diff changeset
143 pass
36f6520671b3 Uploaded
cschu
parents:
diff changeset
144
36f6520671b3 Uploaded
cschu
parents:
diff changeset
145 def extractCandiDataFromSnpEffVCF(snpEffVCF, fo):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
146 snps = []
36f6520671b3 Uploaded
cschu
parents:
diff changeset
147 for line in snpEffVCF:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
148 if not line.startswith('#'):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
149 #fo.write(line)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
150 asf = AnnotatedSNPEffectFactory(*line.strip().split('\t'), log=fo)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
151 snpEffects = list(asf.getEffects())
36f6520671b3 Uploaded
cschu
parents:
diff changeset
152 #fo.write(str(snpEffects)+'\n')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
153 snps.extend(snpEffects)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
154 return snps
36f6520671b3 Uploaded
cschu
parents:
diff changeset
155
36f6520671b3 Uploaded
cschu
parents:
diff changeset
156
36f6520671b3 Uploaded
cschu
parents:
diff changeset
157
36f6520671b3 Uploaded
cschu
parents:
diff changeset
158 def main(argv):
36f6520671b3 Uploaded
cschu
parents:
diff changeset
159
36f6520671b3 Uploaded
cschu
parents:
diff changeset
160 descr = ''
36f6520671b3 Uploaded
cschu
parents:
diff changeset
161 parser = argparse.ArgumentParser(description=descr)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
162 parser.add_argument('--ref', help='The snpEff genome reference.')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
163 parser.add_argument('snpEff_output', type=str, help='The input file.')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
164 parser.add_argument('candisnp_html', type=str, help='The output file.')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
165 args = parser.parse_args()
36f6520671b3 Uploaded
cschu
parents:
diff changeset
166
36f6520671b3 Uploaded
cschu
parents:
diff changeset
167 fo = open(args.candisnp_html, 'wb')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
168 """
36f6520671b3 Uploaded
cschu
parents:
diff changeset
169 import subprocess
36f6520671b3 Uploaded
cschu
parents:
diff changeset
170 import urllib2
36f6520671b3 Uploaded
cschu
parents:
diff changeset
171 response = urllib2.urlopen('http://ruup.xyz:8080/monitors')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
172 html = response.read()
36f6520671b3 Uploaded
cschu
parents:
diff changeset
173
36f6520671b3 Uploaded
cschu
parents:
diff changeset
174
36f6520671b3 Uploaded
cschu
parents:
diff changeset
175 # p = subprocess.Popen(['wget http://ruup.xyz:8080/monitors'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
176 # fo.write(p.communicate()[0])
36f6520671b3 Uploaded
cschu
parents:
diff changeset
177 fo.write(html)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
178 fo.close()
36f6520671b3 Uploaded
cschu
parents:
diff changeset
179
36f6520671b3 Uploaded
cschu
parents:
diff changeset
180 return None
36f6520671b3 Uploaded
cschu
parents:
diff changeset
181 """
36f6520671b3 Uploaded
cschu
parents:
diff changeset
182 """
36f6520671b3 Uploaded
cschu
parents:
diff changeset
183 # This works.
36f6520671b3 Uploaded
cschu
parents:
diff changeset
184 candiData = CANDI_TESTDATA
36f6520671b3 Uploaded
cschu
parents:
diff changeset
185 candiMessage = json.dumps(candiData)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
186 """
36f6520671b3 Uploaded
cschu
parents:
diff changeset
187 candiData = extractCandiDataFromSnpEffVCF(open(args.snpEff_output), fo)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
188 candiMessage = json.dumps({'ref': args.ref, 'data': candiData})
36f6520671b3 Uploaded
cschu
parents:
diff changeset
189
36f6520671b3 Uploaded
cschu
parents:
diff changeset
190 request = urllib2.Request(CANDISNP_SERVER + ':8080',
36f6520671b3 Uploaded
cschu
parents:
diff changeset
191 headers=contentHeaders,
36f6520671b3 Uploaded
cschu
parents:
diff changeset
192 data=candiMessage)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
193
36f6520671b3 Uploaded
cschu
parents:
diff changeset
194
36f6520671b3 Uploaded
cschu
parents:
diff changeset
195 try:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
196 response = urllib2.urlopen(request)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
197 candiURL = response.read()
36f6520671b3 Uploaded
cschu
parents:
diff changeset
198 except urllib2.URLError, e:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
199 candiURL = ''
36f6520671b3 Uploaded
cschu
parents:
diff changeset
200 sys.stderr.write(str(e.reason) + '\n')
36f6520671b3 Uploaded
cschu
parents:
diff changeset
201
36f6520671b3 Uploaded
cschu
parents:
diff changeset
202 if candiURL:
5
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
203 # body = urllib2.urlopen(candiURL)
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
204 # fo.write(body.read())
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
205 #fo.write('<iframe name="galaxy_main" id="galaxy_main" frameborder="0" style="position: absolute; width: 100%; height: 100%;" src="%s"></iframe>' % candiURL)
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
206 #fo.write('<iframe name="galaxy_main" id="galaxy_main" frameborder="0" style="position: absolute; width: 100%; height: 100%;" src="%s"></iframe>' % candiURL)
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
207 fo.write('<iframe src="%s" frameborder="0" style="position: absolute; width: 100%%; height: 100%%;"></iframe>\n' % candiURL)
9215ffe7d4d5 Fixed issue with INDEL annotations. Changed output to iframe.
cschu
parents: 0
diff changeset
208 # fo.write('<iframe src="%s"></iframe>\n' % candiURL)
0
36f6520671b3 Uploaded
cschu
parents:
diff changeset
209 else:
36f6520671b3 Uploaded
cschu
parents:
diff changeset
210 fo.write('I am sorry. CandiSNP does not pick up. Maybe (<a href="%s" target="_blank">try it manually?</a>)\n' % CANDISNP_SERVER)
36f6520671b3 Uploaded
cschu
parents:
diff changeset
211
36f6520671b3 Uploaded
cschu
parents:
diff changeset
212 fo.close()
36f6520671b3 Uploaded
cschu
parents:
diff changeset
213 pass
36f6520671b3 Uploaded
cschu
parents:
diff changeset
214
36f6520671b3 Uploaded
cschu
parents:
diff changeset
215 # main(sys.argv[1:])
36f6520671b3 Uploaded
cschu
parents:
diff changeset
216
36f6520671b3 Uploaded
cschu
parents:
diff changeset
217 if __name__ == '__main__': main(sys.argv[1:])