comparison chasm_webservice/chasm_webservice.py @ 5:b9de63c72559 draft

Uploaded
author saketkc
date Mon, 14 Apr 2014 19:16:11 -0400
parents
children d88b17f16b14
comparison
equal deleted inserted replaced
4:8aceb7fc57f5 5:b9de63c72559
1 #!/usr/bin/python
2 """
3 The MIT License (MIT)
4
5 Copyright (c) 2014 Saket Choudhary, <saketkc@gmail.com>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the 'Software'), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 THE SOFTWARE.
24
25 """
26 import sys
27 import requests
28 import argparse
29 import time
30 from functools import wraps
31 import simplejson as json
32 import zipfile
33 import tempfile
34 import ntpath
35 import shutil
36 import xlrd
37 import csv
38 import os
39 sheet_map = {0: 'Variant_Analysis.csv',
40 1: 'Amino_Acid_Level_Analysis.csv', 2: 'Gene_Level_Analysis.csv'}
41
42
43 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
44 '''Retry calling the decorated function using an exponential backoff.
45
46 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
47 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
48
49 :param ExceptionToCheck: the exception to check. may be a tuple of
50 exceptions to check
51 :type ExceptionToCheck: Exception or tuple
52 :param tries: number of times to try (not retry) before giving up
53 :type tries: int
54 :param delay: initial delay between retries in seconds
55 :type delay: int
56 :param backoff: backoff multiplier e.g. value of 2 will double the delay
57 each retry
58 :type backoff: int
59 :param logger: logger to use. If None, print
60 :type logger: logging.Logger instance
61 '''
62 def deco_retry(f):
63
64 @wraps(f)
65 def f_retry(*args, **kwargs):
66 mtries, mdelay = tries, delay
67 while mtries > 1:
68 try:
69 return f(*args, **kwargs)
70 except ExceptionToCheck, e:
71 #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay)
72 msg = 'Retrying in %d seconds...' % (mdelay)
73 if logger:
74 logger.warning(msg)
75 else:
76 print msg
77 time.sleep(mdelay)
78 mtries -= 1
79 mdelay *= backoff
80 return f(*args, **kwargs)
81
82 return f_retry # true decorator
83
84 return deco_retry
85 CANCERTYPES = ['Bladder', 'Blood-Lymphocyte', 'Blood-Myeloid',
86 'Brain-Cerebellum', 'Brain-Glioblastoma_Multiforme',
87 'Brain-Lower_Grade_Glioma', 'Breast', 'Cervix',
88 'Colon', 'Head_and_Neck', 'Kidney-Chromophobe',
89 'Kidney-Clear_Cell', 'Kidney-Papiallary_Cell',
90 'Liver-Nonviral', 'Liver-Viral', 'Lung-Adenocarcinoma',
91 'Lung-Squamous_Cell', 'Melanoma', 'Other', 'Ovary',
92 'Pancreas', 'Prostate-Adenocarcinoma', 'Rectum',
93 'Skin', 'Stomach', 'Thyroid', 'Uterus']
94
95 __URL__ = 'http://www.cravat.us/rest/service/submit'
96
97
98 def stop_err(msg):
99 sys.stderr.write('%s\n' % msg)
100 sys.exit()
101
102
103 class CHASMWeb:
104
105 def __init__(self,
106 mutationbox=None, filepath=None,
107 is_hg_18=None, analysis_type=None,
108 analysis_program=None, chosendb=None,
109 cancer_type=None, email=None,
110 annotate_genes=None, text_reports=None,
111 mupit_out=None):
112 self.mutationbox = mutationbox
113 self.filepath = filepath
114 self.is_hg_18 = is_hg_18
115 self.analysis_type = analysis_type
116 self.analysis_program = analysis_program
117 self.chosendb = chosendb
118 self.email = email
119 self.annotate_genes = annotate_genes
120 self.cancer_type = cancer_type
121 self.email = email
122 self.annotate_genes = annotate_genes
123 self.text_reports = text_reports
124 self.mupit_input = mupit_out
125
126 def make_request(self):
127 data = {
128 'mutations ': self.mutationbox,
129 'hg18': self.is_hg_18,
130 'analysistype': self.analysis_type,
131 'analysisitem': self.analysis_program,
132 'chasmclassifier': self.cancer_type,
133 'geneannotation': self.annotate_genes,
134 'email': self.email,
135 'tsvreport': 'on', # self.text_reports,
136 'mupitinput': self.mupit_input,
137 }
138 stripped_data = {}
139
140 for key, value in data.iteritems():
141 if value is True:
142 value = 'on'
143 if value is not None and value is not False:
144 stripped_data[key] = value
145
146 if not self.mutationbox:
147 file_payload = {'inputfile': open(self.filepath)}
148 request = requests.post(
149 __URL__, data=stripped_data, files=file_payload)
150 else:
151 request = requests.post(
152 __URL__, data=stripped_data, files=dict(foo='bar'))
153 job_id = json.loads(request.text)['jobid']
154 return job_id
155
156 @retry(requests.exceptions.HTTPError)
157 def zip_exists(self, job_id):
158 url = 'http://www.cravat.us/results/%s/%s.zip' % (job_id, job_id)
159 zip_download_request = requests.request('GET', url)
160 if zip_download_request.status_code == 404:
161 raise requests.HTTPError()
162 else:
163 return url
164
165 def download_zip(self, url, job_id):
166 self.tmp_dir = tempfile.mkdtemp()
167 r = requests.get(url, stream=True)
168 if r.status_code == 200:
169 self.path = os.path.join(self.tmp_dir, job_id + '.zip')
170 with open(self.path, 'wb') as f:
171 for chunk in r.iter_content(128):
172 f.write(chunk)
173 else:
174 self.path = None
175 return self.path
176
177 def move_files(self, file_map):
178 fh = open(self.path, 'rb')
179 zip_files = zipfile.ZipFile(fh)
180 for name in zip_files.namelist():
181 filename = ntpath.basename(name)
182 extension = ntpath.splitext(filename)[-1]
183 source_file = zip_files.open(name)
184 if extension == '.txt':
185 target_file = open(file_map['error.txt'], 'wb')
186 elif filename != 'SnvGet Feature Description.xls' and extension != '.xls':
187 target_file = open(file_map[filename], 'wbb')
188 else:
189 target_file = None
190 if target_file:
191 with source_file, target_file:
192 shutil.copyfileobj(source_file, target_file)
193 if filename == 'SnvGet Feature Description.xls':
194 with xlrd.open_workbook(source_file) as wb:
195 sheet_names = wb.sheet_names()
196 for name in sheet_names:
197 sh = wb.sheet_by_name(name)
198 name_shortened = name.replace(' ').strip() + '.csv'
199 with open(name_shortened, 'wb') as f:
200 c = csv.writer(f)
201 for r in range(sh.nrows):
202 c.writerow(sh.row_values(r))
203 shutil.rmtree(self.tmp_dir)
204 fh.close()
205
206
207 def main(params):
208
209 parser = argparse.ArgumentParser()
210 parser.add_argument('-i', '--input',
211 type=str, dest='mutationbox',
212 help='Input variants')
213 parser.add_argument('--path', type=str,
214 dest='input_file_location',
215 help='Input file location')
216 parser.add_argument('--hg18', dest='hg18',
217 action='store_true')
218 parser.add_argument('--analysis_type', dest='analysis_type',
219 type=str,
220 choices=['driver', 'functional',
221 'geneannotationonly'],
222 default='driver')
223 parser.add_argument('--chosendb', dest='chosendb',
224 type=str, nargs='*',
225 choices=['CHASM', 'SnvGet'],
226 default='CHASM')
227 parser.add_argument('--cancertype', dest='cancer_type',
228 type=str, choices=CANCERTYPES,
229 required=True)
230 parser.add_argument('--email', dest='email',
231 required=True, type=str)
232 parser.add_argument('--annotate', dest='annotate',
233 action='store_true', default=None)
234 parser.add_argument('--tsv_report', dest='tsv_report',
235 action='store_true', default=None)
236 parser.add_argument('--mupit_out', dest='mupit_out',
237 action='store_true', default=None)
238 parser.add_argument('--gene_analysis_out', dest='gene_analysis_out',
239 type=str, required=True)
240 parser.add_argument('--variant_analysis_out',
241 dest='variant_analysis_out',
242 type=str, required=True)
243 parser.add_argument('--amino_acid_level_analysis_out',
244 dest='amino_acid_level_analysis_out',
245 type=str, required=True,)
246 parser.add_argument('--error_file', dest='error_file_out',
247 type=str, required=True)
248 parser.add_argument('--snv_box_out', dest='snv_box_out',
249 type=str, required=False)
250 parser.add_argument('--snv_features', dest='snv_features_out',
251 type=str, required=False)
252 args = parser.parse_args(params)
253 chasm_web = CHASMWeb(mutationbox=args.mutationbox,
254 filepath=args.input_file_location,
255 is_hg_18=args.hg18,
256 analysis_type=args.analysis_type,
257 chosendb=args.chosendb,
258 cancer_type=args.cancer_type,
259 email=args.email,
260 annotate_genes=args.annotate,
261 text_reports=args.tsv_report,
262 mupit_out=args.mupit_out)
263 job_id = chasm_web.make_request()
264 file_map = {'Amino_Acid_Level_Analysis.Result.tsv': args.amino_acid_level_analysis_out,
265 'SNVBox.tsv': args.snv_box_out,
266 'Variant_Analysis.Result.tsv': args.variant_analysis_out,
267 'Gene_Level_Analysis.Result.tsv': args.gene_analysis_out,
268 'SnvGet Feature Description.xls': args.snv_features_out,
269 'error.txt': args.error_file_out
270 }
271 url = chasm_web.zip_exists(job_id)
272 download = chasm_web.download_zip(url, job_id)
273 if download:
274 chasm_web.move_files(file_map=file_map)
275 else:
276 stop_err('Unable to download from the server')
277
278 if __name__ == '__main__':
279 main(sys.argv[1:])