Mercurial > repos > saketkc > chasm_web
comparison chasm_webservice/chasm_webservice.py @ 5:b9de63c72559 draft
Uploaded
| author | saketkc |
|---|---|
| date | Mon, 14 Apr 2014 19:16:11 -0400 |
| parents | |
| children | d88b17f16b14 |
comparison
equal
deleted
inserted
replaced
| 4:8aceb7fc57f5 | 5:b9de63c72559 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 """ | |
| 3 The MIT License (MIT) | |
| 4 | |
| 5 Copyright (c) 2014 Saket Choudhary, <saketkc@gmail.com> | |
| 6 | |
| 7 Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 8 of this software and associated documentation files (the 'Software'), to deal | |
| 9 in the Software without restriction, including without limitation the rights | |
| 10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 11 copies of the Software, and to permit persons to whom the Software is | |
| 12 furnished to do so, subject to the following conditions: | |
| 13 | |
| 14 The above copyright notice and this permission notice shall be included in | |
| 15 all copies or substantial portions of the Software. | |
| 16 | |
| 17 THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 23 THE SOFTWARE. | |
| 24 | |
| 25 """ | |
| 26 import sys | |
| 27 import requests | |
| 28 import argparse | |
| 29 import time | |
| 30 from functools import wraps | |
| 31 import simplejson as json | |
| 32 import zipfile | |
| 33 import tempfile | |
| 34 import ntpath | |
| 35 import shutil | |
| 36 import xlrd | |
| 37 import csv | |
| 38 import os | |
| 39 sheet_map = {0: 'Variant_Analysis.csv', | |
| 40 1: 'Amino_Acid_Level_Analysis.csv', 2: 'Gene_Level_Analysis.csv'} | |
| 41 | |
| 42 | |
| 43 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): | |
| 44 '''Retry calling the decorated function using an exponential backoff. | |
| 45 | |
| 46 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ | |
| 47 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry | |
| 48 | |
| 49 :param ExceptionToCheck: the exception to check. may be a tuple of | |
| 50 exceptions to check | |
| 51 :type ExceptionToCheck: Exception or tuple | |
| 52 :param tries: number of times to try (not retry) before giving up | |
| 53 :type tries: int | |
| 54 :param delay: initial delay between retries in seconds | |
| 55 :type delay: int | |
| 56 :param backoff: backoff multiplier e.g. value of 2 will double the delay | |
| 57 each retry | |
| 58 :type backoff: int | |
| 59 :param logger: logger to use. If None, print | |
| 60 :type logger: logging.Logger instance | |
| 61 ''' | |
| 62 def deco_retry(f): | |
| 63 | |
| 64 @wraps(f) | |
| 65 def f_retry(*args, **kwargs): | |
| 66 mtries, mdelay = tries, delay | |
| 67 while mtries > 1: | |
| 68 try: | |
| 69 return f(*args, **kwargs) | |
| 70 except ExceptionToCheck, e: | |
| 71 #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay) | |
| 72 msg = 'Retrying in %d seconds...' % (mdelay) | |
| 73 if logger: | |
| 74 logger.warning(msg) | |
| 75 else: | |
| 76 print msg | |
| 77 time.sleep(mdelay) | |
| 78 mtries -= 1 | |
| 79 mdelay *= backoff | |
| 80 return f(*args, **kwargs) | |
| 81 | |
| 82 return f_retry # true decorator | |
| 83 | |
| 84 return deco_retry | |
| 85 CANCERTYPES = ['Bladder', 'Blood-Lymphocyte', 'Blood-Myeloid', | |
| 86 'Brain-Cerebellum', 'Brain-Glioblastoma_Multiforme', | |
| 87 'Brain-Lower_Grade_Glioma', 'Breast', 'Cervix', | |
| 88 'Colon', 'Head_and_Neck', 'Kidney-Chromophobe', | |
| 89 'Kidney-Clear_Cell', 'Kidney-Papiallary_Cell', | |
| 90 'Liver-Nonviral', 'Liver-Viral', 'Lung-Adenocarcinoma', | |
| 91 'Lung-Squamous_Cell', 'Melanoma', 'Other', 'Ovary', | |
| 92 'Pancreas', 'Prostate-Adenocarcinoma', 'Rectum', | |
| 93 'Skin', 'Stomach', 'Thyroid', 'Uterus'] | |
| 94 | |
| 95 __URL__ = 'http://www.cravat.us/rest/service/submit' | |
| 96 | |
| 97 | |
| 98 def stop_err(msg): | |
| 99 sys.stderr.write('%s\n' % msg) | |
| 100 sys.exit() | |
| 101 | |
| 102 | |
| 103 class CHASMWeb: | |
| 104 | |
| 105 def __init__(self, | |
| 106 mutationbox=None, filepath=None, | |
| 107 is_hg_18=None, analysis_type=None, | |
| 108 analysis_program=None, chosendb=None, | |
| 109 cancer_type=None, email=None, | |
| 110 annotate_genes=None, text_reports=None, | |
| 111 mupit_out=None): | |
| 112 self.mutationbox = mutationbox | |
| 113 self.filepath = filepath | |
| 114 self.is_hg_18 = is_hg_18 | |
| 115 self.analysis_type = analysis_type | |
| 116 self.analysis_program = analysis_program | |
| 117 self.chosendb = chosendb | |
| 118 self.email = email | |
| 119 self.annotate_genes = annotate_genes | |
| 120 self.cancer_type = cancer_type | |
| 121 self.email = email | |
| 122 self.annotate_genes = annotate_genes | |
| 123 self.text_reports = text_reports | |
| 124 self.mupit_input = mupit_out | |
| 125 | |
| 126 def make_request(self): | |
| 127 data = { | |
| 128 'mutations ': self.mutationbox, | |
| 129 'hg18': self.is_hg_18, | |
| 130 'analysistype': self.analysis_type, | |
| 131 'analysisitem': self.analysis_program, | |
| 132 'chasmclassifier': self.cancer_type, | |
| 133 'geneannotation': self.annotate_genes, | |
| 134 'email': self.email, | |
| 135 'tsvreport': 'on', # self.text_reports, | |
| 136 'mupitinput': self.mupit_input, | |
| 137 } | |
| 138 stripped_data = {} | |
| 139 | |
| 140 for key, value in data.iteritems(): | |
| 141 if value is True: | |
| 142 value = 'on' | |
| 143 if value is not None and value is not False: | |
| 144 stripped_data[key] = value | |
| 145 | |
| 146 if not self.mutationbox: | |
| 147 file_payload = {'inputfile': open(self.filepath)} | |
| 148 request = requests.post( | |
| 149 __URL__, data=stripped_data, files=file_payload) | |
| 150 else: | |
| 151 request = requests.post( | |
| 152 __URL__, data=stripped_data, files=dict(foo='bar')) | |
| 153 job_id = json.loads(request.text)['jobid'] | |
| 154 return job_id | |
| 155 | |
| 156 @retry(requests.exceptions.HTTPError) | |
| 157 def zip_exists(self, job_id): | |
| 158 url = 'http://www.cravat.us/results/%s/%s.zip' % (job_id, job_id) | |
| 159 zip_download_request = requests.request('GET', url) | |
| 160 if zip_download_request.status_code == 404: | |
| 161 raise requests.HTTPError() | |
| 162 else: | |
| 163 return url | |
| 164 | |
| 165 def download_zip(self, url, job_id): | |
| 166 self.tmp_dir = tempfile.mkdtemp() | |
| 167 r = requests.get(url, stream=True) | |
| 168 if r.status_code == 200: | |
| 169 self.path = os.path.join(self.tmp_dir, job_id + '.zip') | |
| 170 with open(self.path, 'wb') as f: | |
| 171 for chunk in r.iter_content(128): | |
| 172 f.write(chunk) | |
| 173 else: | |
| 174 self.path = None | |
| 175 return self.path | |
| 176 | |
| 177 def move_files(self, file_map): | |
| 178 fh = open(self.path, 'rb') | |
| 179 zip_files = zipfile.ZipFile(fh) | |
| 180 for name in zip_files.namelist(): | |
| 181 filename = ntpath.basename(name) | |
| 182 extension = ntpath.splitext(filename)[-1] | |
| 183 source_file = zip_files.open(name) | |
| 184 if extension == '.txt': | |
| 185 target_file = open(file_map['error.txt'], 'wb') | |
| 186 elif filename != 'SnvGet Feature Description.xls' and extension != '.xls': | |
| 187 target_file = open(file_map[filename], 'wbb') | |
| 188 else: | |
| 189 target_file = None | |
| 190 if target_file: | |
| 191 with source_file, target_file: | |
| 192 shutil.copyfileobj(source_file, target_file) | |
| 193 if filename == 'SnvGet Feature Description.xls': | |
| 194 with xlrd.open_workbook(source_file) as wb: | |
| 195 sheet_names = wb.sheet_names() | |
| 196 for name in sheet_names: | |
| 197 sh = wb.sheet_by_name(name) | |
| 198 name_shortened = name.replace(' ').strip() + '.csv' | |
| 199 with open(name_shortened, 'wb') as f: | |
| 200 c = csv.writer(f) | |
| 201 for r in range(sh.nrows): | |
| 202 c.writerow(sh.row_values(r)) | |
| 203 shutil.rmtree(self.tmp_dir) | |
| 204 fh.close() | |
| 205 | |
| 206 | |
| 207 def main(params): | |
| 208 | |
| 209 parser = argparse.ArgumentParser() | |
| 210 parser.add_argument('-i', '--input', | |
| 211 type=str, dest='mutationbox', | |
| 212 help='Input variants') | |
| 213 parser.add_argument('--path', type=str, | |
| 214 dest='input_file_location', | |
| 215 help='Input file location') | |
| 216 parser.add_argument('--hg18', dest='hg18', | |
| 217 action='store_true') | |
| 218 parser.add_argument('--analysis_type', dest='analysis_type', | |
| 219 type=str, | |
| 220 choices=['driver', 'functional', | |
| 221 'geneannotationonly'], | |
| 222 default='driver') | |
| 223 parser.add_argument('--chosendb', dest='chosendb', | |
| 224 type=str, nargs='*', | |
| 225 choices=['CHASM', 'SnvGet'], | |
| 226 default='CHASM') | |
| 227 parser.add_argument('--cancertype', dest='cancer_type', | |
| 228 type=str, choices=CANCERTYPES, | |
| 229 required=True) | |
| 230 parser.add_argument('--email', dest='email', | |
| 231 required=True, type=str) | |
| 232 parser.add_argument('--annotate', dest='annotate', | |
| 233 action='store_true', default=None) | |
| 234 parser.add_argument('--tsv_report', dest='tsv_report', | |
| 235 action='store_true', default=None) | |
| 236 parser.add_argument('--mupit_out', dest='mupit_out', | |
| 237 action='store_true', default=None) | |
| 238 parser.add_argument('--gene_analysis_out', dest='gene_analysis_out', | |
| 239 type=str, required=True) | |
| 240 parser.add_argument('--variant_analysis_out', | |
| 241 dest='variant_analysis_out', | |
| 242 type=str, required=True) | |
| 243 parser.add_argument('--amino_acid_level_analysis_out', | |
| 244 dest='amino_acid_level_analysis_out', | |
| 245 type=str, required=True,) | |
| 246 parser.add_argument('--error_file', dest='error_file_out', | |
| 247 type=str, required=True) | |
| 248 parser.add_argument('--snv_box_out', dest='snv_box_out', | |
| 249 type=str, required=False) | |
| 250 parser.add_argument('--snv_features', dest='snv_features_out', | |
| 251 type=str, required=False) | |
| 252 args = parser.parse_args(params) | |
| 253 chasm_web = CHASMWeb(mutationbox=args.mutationbox, | |
| 254 filepath=args.input_file_location, | |
| 255 is_hg_18=args.hg18, | |
| 256 analysis_type=args.analysis_type, | |
| 257 chosendb=args.chosendb, | |
| 258 cancer_type=args.cancer_type, | |
| 259 email=args.email, | |
| 260 annotate_genes=args.annotate, | |
| 261 text_reports=args.tsv_report, | |
| 262 mupit_out=args.mupit_out) | |
| 263 job_id = chasm_web.make_request() | |
| 264 file_map = {'Amino_Acid_Level_Analysis.Result.tsv': args.amino_acid_level_analysis_out, | |
| 265 'SNVBox.tsv': args.snv_box_out, | |
| 266 'Variant_Analysis.Result.tsv': args.variant_analysis_out, | |
| 267 'Gene_Level_Analysis.Result.tsv': args.gene_analysis_out, | |
| 268 'SnvGet Feature Description.xls': args.snv_features_out, | |
| 269 'error.txt': args.error_file_out | |
| 270 } | |
| 271 url = chasm_web.zip_exists(job_id) | |
| 272 download = chasm_web.download_zip(url, job_id) | |
| 273 if download: | |
| 274 chasm_web.move_files(file_map=file_map) | |
| 275 else: | |
| 276 stop_err('Unable to download from the server') | |
| 277 | |
| 278 if __name__ == '__main__': | |
| 279 main(sys.argv[1:]) |
