5
|
1 #!/usr/bin/python
|
|
2 """
|
|
3 The MIT License (MIT)
|
|
4
|
|
5 Copyright (c) 2014 Saket Choudhary, <saketkc@gmail.com>
|
|
6
|
|
7 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8 of this software and associated documentation files (the 'Software'), to deal
|
|
9 in the Software without restriction, including without limitation the rights
|
|
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11 copies of the Software, and to permit persons to whom the Software is
|
|
12 furnished to do so, subject to the following conditions:
|
|
13
|
|
14 The above copyright notice and this permission notice shall be included in
|
|
15 all copies or substantial portions of the Software.
|
|
16
|
|
17 THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
23 THE SOFTWARE.
|
|
24
|
|
25 """
|
|
26 import sys
|
|
27 import requests
|
|
28 import argparse
|
|
29 import time
|
|
30 from functools import wraps
|
6
|
31 import json
|
5
|
32 import zipfile
|
|
33 import tempfile
|
|
34 import ntpath
|
|
35 import shutil
|
|
36 import xlrd
|
|
37 import csv
|
|
38 import os
|
|
39 sheet_map = {0: 'Variant_Analysis.csv',
|
|
40 1: 'Amino_Acid_Level_Analysis.csv', 2: 'Gene_Level_Analysis.csv'}
|
|
41
|
|
42
|
7
|
43 def retry(ExceptionToCheck, tries=40000, delay=3, backoff=2, logger=None):
|
5
|
44 '''Retry calling the decorated function using an exponential backoff.
|
|
45
|
|
46 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
|
|
47 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
|
|
48
|
|
49 :param ExceptionToCheck: the exception to check. may be a tuple of
|
|
50 exceptions to check
|
|
51 :type ExceptionToCheck: Exception or tuple
|
|
52 :param tries: number of times to try (not retry) before giving up
|
|
53 :type tries: int
|
|
54 :param delay: initial delay between retries in seconds
|
|
55 :type delay: int
|
|
56 :param backoff: backoff multiplier e.g. value of 2 will double the delay
|
|
57 each retry
|
|
58 :type backoff: int
|
|
59 :param logger: logger to use. If None, print
|
|
60 :type logger: logging.Logger instance
|
|
61 '''
|
|
62 def deco_retry(f):
|
|
63
|
|
64 @wraps(f)
|
|
65 def f_retry(*args, **kwargs):
|
|
66 mtries, mdelay = tries, delay
|
|
67 while mtries > 1:
|
|
68 try:
|
|
69 return f(*args, **kwargs)
|
|
70 except ExceptionToCheck, e:
|
|
71 #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay)
|
|
72 msg = 'Retrying in %d seconds...' % (mdelay)
|
|
73 if logger:
|
|
74 logger.warning(msg)
|
|
75 else:
|
|
76 print msg
|
|
77 time.sleep(mdelay)
|
|
78 mtries -= 1
|
|
79 mdelay *= backoff
|
|
80 return f(*args, **kwargs)
|
|
81
|
|
82 return f_retry # true decorator
|
|
83
|
|
84 return deco_retry
|
|
85 CANCERTYPES = ['Bladder', 'Blood-Lymphocyte', 'Blood-Myeloid',
|
|
86 'Brain-Cerebellum', 'Brain-Glioblastoma_Multiforme',
|
|
87 'Brain-Lower_Grade_Glioma', 'Breast', 'Cervix',
|
|
88 'Colon', 'Head_and_Neck', 'Kidney-Chromophobe',
|
|
89 'Kidney-Clear_Cell', 'Kidney-Papiallary_Cell',
|
|
90 'Liver-Nonviral', 'Liver-Viral', 'Lung-Adenocarcinoma',
|
|
91 'Lung-Squamous_Cell', 'Melanoma', 'Other', 'Ovary',
|
|
92 'Pancreas', 'Prostate-Adenocarcinoma', 'Rectum',
|
|
93 'Skin', 'Stomach', 'Thyroid', 'Uterus']
|
|
94
|
|
95 __URL__ = 'http://www.cravat.us/rest/service/submit'
|
|
96
|
|
97
|
|
98 def stop_err(msg):
|
|
99 sys.stderr.write('%s\n' % msg)
|
|
100 sys.exit()
|
|
101
|
|
102
|
|
103 class CHASMWeb:
|
|
104
|
|
105 def __init__(self,
|
|
106 mutationbox=None, filepath=None,
|
|
107 is_hg_18=None, analysis_type=None,
|
|
108 analysis_program=None, chosendb=None,
|
|
109 cancer_type=None, email=None,
|
|
110 annotate_genes=None, text_reports=None,
|
|
111 mupit_out=None):
|
|
112 self.mutationbox = mutationbox
|
|
113 self.filepath = filepath
|
|
114 self.is_hg_18 = is_hg_18
|
|
115 self.analysis_type = analysis_type
|
|
116 self.analysis_program = analysis_program
|
|
117 self.chosendb = chosendb
|
|
118 self.email = email
|
|
119 self.annotate_genes = annotate_genes
|
|
120 self.cancer_type = cancer_type
|
|
121 self.email = email
|
|
122 self.annotate_genes = annotate_genes
|
|
123 self.text_reports = text_reports
|
|
124 self.mupit_input = mupit_out
|
|
125
|
|
126 def make_request(self):
|
|
127 data = {
|
|
128 'mutations ': self.mutationbox,
|
|
129 'hg18': self.is_hg_18,
|
|
130 'analysistype': self.analysis_type,
|
|
131 'analysisitem': self.analysis_program,
|
|
132 'chasmclassifier': self.cancer_type,
|
|
133 'geneannotation': self.annotate_genes,
|
|
134 'email': self.email,
|
|
135 'tsvreport': 'on', # self.text_reports,
|
|
136 'mupitinput': self.mupit_input,
|
|
137 }
|
|
138 stripped_data = {}
|
|
139
|
|
140 for key, value in data.iteritems():
|
|
141 if value is True:
|
|
142 value = 'on'
|
|
143 if value is not None and value is not False:
|
|
144 stripped_data[key] = value
|
|
145
|
|
146 if not self.mutationbox:
|
|
147 file_payload = {'inputfile': open(self.filepath)}
|
|
148 request = requests.post(
|
|
149 __URL__, data=stripped_data, files=file_payload)
|
|
150 else:
|
|
151 request = requests.post(
|
|
152 __URL__, data=stripped_data, files=dict(foo='bar'))
|
7
|
153 print request.text
|
5
|
154 job_id = json.loads(request.text)['jobid']
|
|
155 return job_id
|
|
156
|
|
157 @retry(requests.exceptions.HTTPError)
|
|
158 def zip_exists(self, job_id):
|
7
|
159 print job_id
|
5
|
160 url = 'http://www.cravat.us/results/%s/%s.zip' % (job_id, job_id)
|
|
161 zip_download_request = requests.request('GET', url)
|
|
162 if zip_download_request.status_code == 404:
|
|
163 raise requests.HTTPError()
|
|
164 else:
|
|
165 return url
|
|
166
|
|
167 def download_zip(self, url, job_id):
|
|
168 self.tmp_dir = tempfile.mkdtemp()
|
|
169 r = requests.get(url, stream=True)
|
|
170 if r.status_code == 200:
|
|
171 self.path = os.path.join(self.tmp_dir, job_id + '.zip')
|
|
172 with open(self.path, 'wb') as f:
|
|
173 for chunk in r.iter_content(128):
|
|
174 f.write(chunk)
|
|
175 else:
|
|
176 self.path = None
|
|
177 return self.path
|
|
178
|
|
179 def move_files(self, file_map):
|
|
180 fh = open(self.path, 'rb')
|
|
181 zip_files = zipfile.ZipFile(fh)
|
|
182 for name in zip_files.namelist():
|
|
183 filename = ntpath.basename(name)
|
|
184 extension = ntpath.splitext(filename)[-1]
|
|
185 source_file = zip_files.open(name)
|
|
186 if extension == '.txt':
|
|
187 target_file = open(file_map['error.txt'], 'wb')
|
|
188 elif filename != 'SnvGet Feature Description.xls' and extension != '.xls':
|
|
189 target_file = open(file_map[filename], 'wbb')
|
|
190 else:
|
|
191 target_file = None
|
|
192 if target_file:
|
|
193 with source_file, target_file:
|
|
194 shutil.copyfileobj(source_file, target_file)
|
|
195 if filename == 'SnvGet Feature Description.xls':
|
|
196 with xlrd.open_workbook(source_file) as wb:
|
|
197 sheet_names = wb.sheet_names()
|
|
198 for name in sheet_names:
|
|
199 sh = wb.sheet_by_name(name)
|
|
200 name_shortened = name.replace(' ').strip() + '.csv'
|
|
201 with open(name_shortened, 'wb') as f:
|
|
202 c = csv.writer(f)
|
|
203 for r in range(sh.nrows):
|
|
204 c.writerow(sh.row_values(r))
|
|
205 shutil.rmtree(self.tmp_dir)
|
|
206 fh.close()
|
|
207
|
|
208
|
|
209 def main(params):
|
|
210
|
|
211 parser = argparse.ArgumentParser()
|
|
212 parser.add_argument('-i', '--input',
|
|
213 type=str, dest='mutationbox',
|
|
214 help='Input variants')
|
|
215 parser.add_argument('--path', type=str,
|
|
216 dest='input_file_location',
|
|
217 help='Input file location')
|
|
218 parser.add_argument('--hg18', dest='hg18',
|
|
219 action='store_true')
|
|
220 parser.add_argument('--analysis_type', dest='analysis_type',
|
|
221 type=str,
|
|
222 choices=['driver', 'functional',
|
|
223 'geneannotationonly'],
|
|
224 default='driver')
|
|
225 parser.add_argument('--chosendb', dest='chosendb',
|
|
226 type=str, nargs='*',
|
|
227 choices=['CHASM', 'SnvGet'],
|
|
228 default='CHASM')
|
|
229 parser.add_argument('--cancertype', dest='cancer_type',
|
|
230 type=str, choices=CANCERTYPES,
|
|
231 required=True)
|
|
232 parser.add_argument('--email', dest='email',
|
|
233 required=True, type=str)
|
|
234 parser.add_argument('--annotate', dest='annotate',
|
|
235 action='store_true', default=None)
|
|
236 parser.add_argument('--tsv_report', dest='tsv_report',
|
|
237 action='store_true', default=None)
|
|
238 parser.add_argument('--mupit_out', dest='mupit_out',
|
|
239 action='store_true', default=None)
|
|
240 parser.add_argument('--gene_analysis_out', dest='gene_analysis_out',
|
|
241 type=str, required=True)
|
|
242 parser.add_argument('--variant_analysis_out',
|
|
243 dest='variant_analysis_out',
|
|
244 type=str, required=True)
|
|
245 parser.add_argument('--amino_acid_level_analysis_out',
|
|
246 dest='amino_acid_level_analysis_out',
|
|
247 type=str, required=True,)
|
7
|
248 parser.add_argument('--codon_level_analysis_out',
|
|
249 dest='codon_level_analysis_out',
|
|
250 type=str, required=True,)
|
5
|
251 parser.add_argument('--error_file', dest='error_file_out',
|
|
252 type=str, required=True)
|
|
253 parser.add_argument('--snv_box_out', dest='snv_box_out',
|
|
254 type=str, required=False)
|
|
255 parser.add_argument('--snv_features', dest='snv_features_out',
|
|
256 type=str, required=False)
|
|
257 args = parser.parse_args(params)
|
|
258 chasm_web = CHASMWeb(mutationbox=args.mutationbox,
|
|
259 filepath=args.input_file_location,
|
|
260 is_hg_18=args.hg18,
|
|
261 analysis_type=args.analysis_type,
|
|
262 chosendb=args.chosendb,
|
|
263 cancer_type=args.cancer_type,
|
|
264 email=args.email,
|
|
265 annotate_genes=args.annotate,
|
|
266 text_reports=args.tsv_report,
|
|
267 mupit_out=args.mupit_out)
|
|
268 job_id = chasm_web.make_request()
|
|
269 file_map = {'Amino_Acid_Level_Analysis.Result.tsv': args.amino_acid_level_analysis_out,
|
|
270 'SNVBox.tsv': args.snv_box_out,
|
|
271 'Variant_Analysis.Result.tsv': args.variant_analysis_out,
|
|
272 'Gene_Level_Analysis.Result.tsv': args.gene_analysis_out,
|
|
273 'SnvGet Feature Description.xls': args.snv_features_out,
|
7
|
274 'error.txt': args.error_file_out,
|
|
275 'Codon_Level_Analysis.Result.tsv': args.codon_level_analysis_out,
|
5
|
276 }
|
|
277 url = chasm_web.zip_exists(job_id)
|
|
278 download = chasm_web.download_zip(url, job_id)
|
|
279 if download:
|
|
280 chasm_web.move_files(file_map=file_map)
|
|
281 else:
|
|
282 stop_err('Unable to download from the server')
|
|
283
|
|
284 if __name__ == '__main__':
|
|
285 main(sys.argv[1:])
|