Mercurial > repos > saketkc > chasm_web
comparison chasm_webservice/chasm_webservice.py @ 5:b9de63c72559 draft
Uploaded
author | saketkc |
---|---|
date | Mon, 14 Apr 2014 19:16:11 -0400 |
parents | |
children | d88b17f16b14 |
comparison
equal
deleted
inserted
replaced
4:8aceb7fc57f5 | 5:b9de63c72559 |
---|---|
1 #!/usr/bin/python | |
2 """ | |
3 The MIT License (MIT) | |
4 | |
5 Copyright (c) 2014 Saket Choudhary, <saketkc@gmail.com> | |
6 | |
7 Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 of this software and associated documentation files (the 'Software'), to deal | |
9 in the Software without restriction, including without limitation the rights | |
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 copies of the Software, and to permit persons to whom the Software is | |
12 furnished to do so, subject to the following conditions: | |
13 | |
14 The above copyright notice and this permission notice shall be included in | |
15 all copies or substantial portions of the Software. | |
16 | |
17 THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 THE SOFTWARE. | |
24 | |
25 """ | |
26 import sys | |
27 import requests | |
28 import argparse | |
29 import time | |
30 from functools import wraps | |
31 import simplejson as json | |
32 import zipfile | |
33 import tempfile | |
34 import ntpath | |
35 import shutil | |
36 import xlrd | |
37 import csv | |
38 import os | |
39 sheet_map = {0: 'Variant_Analysis.csv', | |
40 1: 'Amino_Acid_Level_Analysis.csv', 2: 'Gene_Level_Analysis.csv'} | |
41 | |
42 | |
43 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): | |
44 '''Retry calling the decorated function using an exponential backoff. | |
45 | |
46 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ | |
47 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry | |
48 | |
49 :param ExceptionToCheck: the exception to check. may be a tuple of | |
50 exceptions to check | |
51 :type ExceptionToCheck: Exception or tuple | |
52 :param tries: number of times to try (not retry) before giving up | |
53 :type tries: int | |
54 :param delay: initial delay between retries in seconds | |
55 :type delay: int | |
56 :param backoff: backoff multiplier e.g. value of 2 will double the delay | |
57 each retry | |
58 :type backoff: int | |
59 :param logger: logger to use. If None, print | |
60 :type logger: logging.Logger instance | |
61 ''' | |
62 def deco_retry(f): | |
63 | |
64 @wraps(f) | |
65 def f_retry(*args, **kwargs): | |
66 mtries, mdelay = tries, delay | |
67 while mtries > 1: | |
68 try: | |
69 return f(*args, **kwargs) | |
70 except ExceptionToCheck, e: | |
71 #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay) | |
72 msg = 'Retrying in %d seconds...' % (mdelay) | |
73 if logger: | |
74 logger.warning(msg) | |
75 else: | |
76 print msg | |
77 time.sleep(mdelay) | |
78 mtries -= 1 | |
79 mdelay *= backoff | |
80 return f(*args, **kwargs) | |
81 | |
82 return f_retry # true decorator | |
83 | |
84 return deco_retry | |
85 CANCERTYPES = ['Bladder', 'Blood-Lymphocyte', 'Blood-Myeloid', | |
86 'Brain-Cerebellum', 'Brain-Glioblastoma_Multiforme', | |
87 'Brain-Lower_Grade_Glioma', 'Breast', 'Cervix', | |
88 'Colon', 'Head_and_Neck', 'Kidney-Chromophobe', | |
89 'Kidney-Clear_Cell', 'Kidney-Papiallary_Cell', | |
90 'Liver-Nonviral', 'Liver-Viral', 'Lung-Adenocarcinoma', | |
91 'Lung-Squamous_Cell', 'Melanoma', 'Other', 'Ovary', | |
92 'Pancreas', 'Prostate-Adenocarcinoma', 'Rectum', | |
93 'Skin', 'Stomach', 'Thyroid', 'Uterus'] | |
94 | |
95 __URL__ = 'http://www.cravat.us/rest/service/submit' | |
96 | |
97 | |
98 def stop_err(msg): | |
99 sys.stderr.write('%s\n' % msg) | |
100 sys.exit() | |
101 | |
102 | |
103 class CHASMWeb: | |
104 | |
105 def __init__(self, | |
106 mutationbox=None, filepath=None, | |
107 is_hg_18=None, analysis_type=None, | |
108 analysis_program=None, chosendb=None, | |
109 cancer_type=None, email=None, | |
110 annotate_genes=None, text_reports=None, | |
111 mupit_out=None): | |
112 self.mutationbox = mutationbox | |
113 self.filepath = filepath | |
114 self.is_hg_18 = is_hg_18 | |
115 self.analysis_type = analysis_type | |
116 self.analysis_program = analysis_program | |
117 self.chosendb = chosendb | |
118 self.email = email | |
119 self.annotate_genes = annotate_genes | |
120 self.cancer_type = cancer_type | |
121 self.email = email | |
122 self.annotate_genes = annotate_genes | |
123 self.text_reports = text_reports | |
124 self.mupit_input = mupit_out | |
125 | |
126 def make_request(self): | |
127 data = { | |
128 'mutations ': self.mutationbox, | |
129 'hg18': self.is_hg_18, | |
130 'analysistype': self.analysis_type, | |
131 'analysisitem': self.analysis_program, | |
132 'chasmclassifier': self.cancer_type, | |
133 'geneannotation': self.annotate_genes, | |
134 'email': self.email, | |
135 'tsvreport': 'on', # self.text_reports, | |
136 'mupitinput': self.mupit_input, | |
137 } | |
138 stripped_data = {} | |
139 | |
140 for key, value in data.iteritems(): | |
141 if value is True: | |
142 value = 'on' | |
143 if value is not None and value is not False: | |
144 stripped_data[key] = value | |
145 | |
146 if not self.mutationbox: | |
147 file_payload = {'inputfile': open(self.filepath)} | |
148 request = requests.post( | |
149 __URL__, data=stripped_data, files=file_payload) | |
150 else: | |
151 request = requests.post( | |
152 __URL__, data=stripped_data, files=dict(foo='bar')) | |
153 job_id = json.loads(request.text)['jobid'] | |
154 return job_id | |
155 | |
156 @retry(requests.exceptions.HTTPError) | |
157 def zip_exists(self, job_id): | |
158 url = 'http://www.cravat.us/results/%s/%s.zip' % (job_id, job_id) | |
159 zip_download_request = requests.request('GET', url) | |
160 if zip_download_request.status_code == 404: | |
161 raise requests.HTTPError() | |
162 else: | |
163 return url | |
164 | |
165 def download_zip(self, url, job_id): | |
166 self.tmp_dir = tempfile.mkdtemp() | |
167 r = requests.get(url, stream=True) | |
168 if r.status_code == 200: | |
169 self.path = os.path.join(self.tmp_dir, job_id + '.zip') | |
170 with open(self.path, 'wb') as f: | |
171 for chunk in r.iter_content(128): | |
172 f.write(chunk) | |
173 else: | |
174 self.path = None | |
175 return self.path | |
176 | |
177 def move_files(self, file_map): | |
178 fh = open(self.path, 'rb') | |
179 zip_files = zipfile.ZipFile(fh) | |
180 for name in zip_files.namelist(): | |
181 filename = ntpath.basename(name) | |
182 extension = ntpath.splitext(filename)[-1] | |
183 source_file = zip_files.open(name) | |
184 if extension == '.txt': | |
185 target_file = open(file_map['error.txt'], 'wb') | |
186 elif filename != 'SnvGet Feature Description.xls' and extension != '.xls': | |
187 target_file = open(file_map[filename], 'wbb') | |
188 else: | |
189 target_file = None | |
190 if target_file: | |
191 with source_file, target_file: | |
192 shutil.copyfileobj(source_file, target_file) | |
193 if filename == 'SnvGet Feature Description.xls': | |
194 with xlrd.open_workbook(source_file) as wb: | |
195 sheet_names = wb.sheet_names() | |
196 for name in sheet_names: | |
197 sh = wb.sheet_by_name(name) | |
198 name_shortened = name.replace(' ').strip() + '.csv' | |
199 with open(name_shortened, 'wb') as f: | |
200 c = csv.writer(f) | |
201 for r in range(sh.nrows): | |
202 c.writerow(sh.row_values(r)) | |
203 shutil.rmtree(self.tmp_dir) | |
204 fh.close() | |
205 | |
206 | |
207 def main(params): | |
208 | |
209 parser = argparse.ArgumentParser() | |
210 parser.add_argument('-i', '--input', | |
211 type=str, dest='mutationbox', | |
212 help='Input variants') | |
213 parser.add_argument('--path', type=str, | |
214 dest='input_file_location', | |
215 help='Input file location') | |
216 parser.add_argument('--hg18', dest='hg18', | |
217 action='store_true') | |
218 parser.add_argument('--analysis_type', dest='analysis_type', | |
219 type=str, | |
220 choices=['driver', 'functional', | |
221 'geneannotationonly'], | |
222 default='driver') | |
223 parser.add_argument('--chosendb', dest='chosendb', | |
224 type=str, nargs='*', | |
225 choices=['CHASM', 'SnvGet'], | |
226 default='CHASM') | |
227 parser.add_argument('--cancertype', dest='cancer_type', | |
228 type=str, choices=CANCERTYPES, | |
229 required=True) | |
230 parser.add_argument('--email', dest='email', | |
231 required=True, type=str) | |
232 parser.add_argument('--annotate', dest='annotate', | |
233 action='store_true', default=None) | |
234 parser.add_argument('--tsv_report', dest='tsv_report', | |
235 action='store_true', default=None) | |
236 parser.add_argument('--mupit_out', dest='mupit_out', | |
237 action='store_true', default=None) | |
238 parser.add_argument('--gene_analysis_out', dest='gene_analysis_out', | |
239 type=str, required=True) | |
240 parser.add_argument('--variant_analysis_out', | |
241 dest='variant_analysis_out', | |
242 type=str, required=True) | |
243 parser.add_argument('--amino_acid_level_analysis_out', | |
244 dest='amino_acid_level_analysis_out', | |
245 type=str, required=True,) | |
246 parser.add_argument('--error_file', dest='error_file_out', | |
247 type=str, required=True) | |
248 parser.add_argument('--snv_box_out', dest='snv_box_out', | |
249 type=str, required=False) | |
250 parser.add_argument('--snv_features', dest='snv_features_out', | |
251 type=str, required=False) | |
252 args = parser.parse_args(params) | |
253 chasm_web = CHASMWeb(mutationbox=args.mutationbox, | |
254 filepath=args.input_file_location, | |
255 is_hg_18=args.hg18, | |
256 analysis_type=args.analysis_type, | |
257 chosendb=args.chosendb, | |
258 cancer_type=args.cancer_type, | |
259 email=args.email, | |
260 annotate_genes=args.annotate, | |
261 text_reports=args.tsv_report, | |
262 mupit_out=args.mupit_out) | |
263 job_id = chasm_web.make_request() | |
264 file_map = {'Amino_Acid_Level_Analysis.Result.tsv': args.amino_acid_level_analysis_out, | |
265 'SNVBox.tsv': args.snv_box_out, | |
266 'Variant_Analysis.Result.tsv': args.variant_analysis_out, | |
267 'Gene_Level_Analysis.Result.tsv': args.gene_analysis_out, | |
268 'SnvGet Feature Description.xls': args.snv_features_out, | |
269 'error.txt': args.error_file_out | |
270 } | |
271 url = chasm_web.zip_exists(job_id) | |
272 download = chasm_web.download_zip(url, job_id) | |
273 if download: | |
274 chasm_web.move_files(file_map=file_map) | |
275 else: | |
276 stop_err('Unable to download from the server') | |
277 | |
278 if __name__ == '__main__': | |
279 main(sys.argv[1:]) |