Mercurial > repos > saketkc > chasm_web
comparison tools/chasm/chasm_web.py @ 1:8eaaa7f6b619
Move files to tools
| author | Saket Choudhary <saketkc@gmail.com> |
|---|---|
| date | Fri, 01 Nov 2013 02:07:53 +0530 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:aea1a2363a94 | 1:8eaaa7f6b619 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 """ | |
| 3 The MIT License (MIT) | |
| 4 | |
| 5 Copyright (c) 2013 Saket Choudhary, <saketkc@gmail.com> | |
| 6 | |
| 7 Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 8 of this software and associated documentation files (the "Software"), to deal | |
| 9 in the Software without restriction, including without limitation the rights | |
| 10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 11 copies of the Software, and to permit persons to whom the Software is | |
| 12 furnished to do so, subject to the following conditions: | |
| 13 | |
| 14 The above copyright notice and this permission notice shall be included in | |
| 15 all copies or substantial portions of the Software. | |
| 16 | |
| 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 23 THE SOFTWARE. | |
| 24 | |
| 25 """ | |
| 26 import requests | |
| 27 import argparse | |
| 28 import sys | |
| 29 import time | |
| 30 from functools import wraps | |
| 31 import simplejson as json | |
| 32 import zipfile | |
| 33 import tempfile, ntpath, shutil | |
| 34 import xlrd | |
| 35 import csv | |
| 36 import os | |
| 37 sheet_map = {0:"Variant_Analysis.csv",1:"Amino_Acid_Level_Analysis.csv",2:"Gene_Level_Analysis.csv"} | |
| 38 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): | |
| 39 """Retry calling the decorated function using an exponential backoff. | |
| 40 | |
| 41 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ | |
| 42 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry | |
| 43 | |
| 44 :param ExceptionToCheck: the exception to check. may be a tuple of | |
| 45 exceptions to check | |
| 46 :type ExceptionToCheck: Exception or tuple | |
| 47 :param tries: number of times to try (not retry) before giving up | |
| 48 :type tries: int | |
| 49 :param delay: initial delay between retries in seconds | |
| 50 :type delay: int | |
| 51 :param backoff: backoff multiplier e.g. value of 2 will double the delay | |
| 52 each retry | |
| 53 :type backoff: int | |
| 54 :param logger: logger to use. If None, print | |
| 55 :type logger: logging.Logger instance | |
| 56 """ | |
| 57 def deco_retry(f): | |
| 58 | |
| 59 @wraps(f) | |
| 60 def f_retry(*args, **kwargs): | |
| 61 mtries, mdelay = tries, delay | |
| 62 while mtries > 1: | |
| 63 try: | |
| 64 return f(*args, **kwargs) | |
| 65 except ExceptionToCheck, e: | |
| 66 #msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) | |
| 67 msg = "Retrying in %d seconds..." % (mdelay) | |
| 68 if logger: | |
| 69 logger.warning(msg) | |
| 70 else: | |
| 71 print msg | |
| 72 time.sleep(mdelay) | |
| 73 mtries -= 1 | |
| 74 mdelay *= backoff | |
| 75 return f(*args, **kwargs) | |
| 76 | |
| 77 return f_retry # true decorator | |
| 78 | |
| 79 return deco_retry | |
| 80 CANCERTYPES =["Bladder","Blood-Lymphocyte","Blood-Myeloid","Brain-Cerebellum","Brain-Glioblastoma_Multiforme","Brain-Lower_Grade_Glioma","Breast","Cervix","Colon","Head_and_Neck","Kidney-Chromophobe","Kidney-Clear_Cell","Kidney-Papiallary_Cell","Liver-Nonviral","Liver-Viral","Lung-Adenocarcinoma","Lung-Squamous_Cell","Melanoma","Other","Ovary","Pancreas","Prostate-Adenocarcinoma","Rectum","Skin","Stomach","Thyroid","Uterus"] | |
| 81 URL="http://www.cravat.us/ClassifierSelect1" | |
| 82 def stop_err( msg ): | |
| 83 sys.stderr.write( '%s\n' % msg ) | |
| 84 sys.exit() | |
| 85 | |
| 86 class CHASMWeb: | |
| 87 def __init__( self, mutationbox=None, filepath=None,is_hg_18=None, analysis_type=None, analysis_program=None, chosendb=None, cancer_type=None, email=None, annotate_genes=None, text_reports=None, mupit_out=None ): | |
| 88 self.mutationbox = mutationbox | |
| 89 self.filepath = filepath | |
| 90 self.is_hg_18 = is_hg_18 | |
| 91 self.analysis_type = analysis_type | |
| 92 self.analysis_program = analysis_program | |
| 93 self.chosendb = chosendb | |
| 94 self.email = email | |
| 95 self.annotate_genes = annotate_genes | |
| 96 self.cancer_type = cancer_type | |
| 97 self.email = email | |
| 98 self.annotate_genes = annotate_genes | |
| 99 self.text_reports=text_reports | |
| 100 self.mupit_input= mupit_out | |
| 101 def make_request( self ): | |
| 102 data = { | |
| 103 "mutationbox":self.mutationbox, | |
| 104 "hg18": self.is_hg_18, | |
| 105 "analysistype": self.analysis_type, | |
| 106 "chosendb": self.analysis_program, | |
| 107 "cancertype": self.cancer_type, | |
| 108 "geneannotcheckbox": self.annotate_genes, | |
| 109 "emailbox": self.email, | |
| 110 "tsvreport": self.text_reports, | |
| 111 "mupitinput": self.mupit_input, | |
| 112 } | |
| 113 stripped_data = {} | |
| 114 | |
| 115 for key,value in data.iteritems(): | |
| 116 if value==True: | |
| 117 value="on" | |
| 118 if value!=None and value!=False: | |
| 119 stripped_data[key]=value | |
| 120 #print stripped_data | |
| 121 if not self.mutationbox: | |
| 122 file_payload={"inputfile":open(self.filepath)} | |
| 123 request = requests.post(URL, data=stripped_data, files=file_payload) | |
| 124 else: | |
| 125 request = requests.post(URL, data=stripped_data, files=dict(foo='bar')) | |
| 126 job_id = json.loads(request.text)["jobId"] | |
| 127 return job_id | |
| 128 @retry(requests.exceptions.HTTPError) | |
| 129 def zip_exists(self,job_id ): | |
| 130 url="http://www.cravat.us/results/%s/%s.zip" %(job_id,job_id) | |
| 131 zip_download_request = requests.request("GET", url) | |
| 132 if zip_download_request.status_code==404: | |
| 133 raise requests.HTTPError() | |
| 134 else: | |
| 135 return url | |
| 136 def download_zip( self, url, job_id): | |
| 137 self.tmp_dir = tempfile.mkdtemp() | |
| 138 r = requests.get( url, stream=True ) | |
| 139 if r.status_code == 200: | |
| 140 self.path = os.path.join( self.tmp_dir,job_id+".zip" ) | |
| 141 with open(self.path, 'wb') as f: | |
| 142 for chunk in r.iter_content(128): | |
| 143 f.write(chunk) | |
| 144 else: | |
| 145 self.path = None | |
| 146 return self.path | |
| 147 | |
| 148 def move_files( self, file_map ): | |
| 149 fh = open(self.path,"rb") | |
| 150 zip_files = zipfile.ZipFile(fh) | |
| 151 for name in zip_files.namelist(): | |
| 152 filename = ntpath.basename(name) | |
| 153 extension = ntpath.splitext(filename)[-1] | |
| 154 source_file = zip_files.open(name) | |
| 155 if extension==".txt": | |
| 156 target_file = open(file_map["error.txt"],"wb") | |
| 157 elif filename!="SnvGet Feature Description.xls" and extension!=".xls": | |
| 158 target_file = open(file_map[filename],"wbb")#file(os.path.join(output_dir,filename),"wb") | |
| 159 else: | |
| 160 target_file=None | |
| 161 if target_file: | |
| 162 with source_file,target_file: | |
| 163 shutil.copyfileobj(source_file,target_file) | |
| 164 if filename=="SnvGet Feature Description.xls": | |
| 165 with xlrd.open_workbook(source_file) as wb: | |
| 166 sheet_names = wb.sheet_names() | |
| 167 for name in sheet_names: | |
| 168 sh=wb.sheet_by_name(name) | |
| 169 name_shortened= name.replace(" ").strip()+".csv" | |
| 170 with open(name_shortened,'wb') as f: | |
| 171 c = csv.writer(f) | |
| 172 for r in range(sh.nrows): | |
| 173 c.writerow(sh.row_values(r)) | |
| 174 shutil.rmtree(self.tmp_dir) | |
| 175 fh.close() | |
| 176 def main(params): | |
| 177 | |
| 178 parser = argparse.ArgumentParser() | |
| 179 parser.add_argument("-i","--input", type=str, dest="mutationbox", help="Input variants") | |
| 180 parser.add_argument("--path",type=str,dest="input_file_location", help="Input file location") | |
| 181 parser.add_argument("--hg18", dest="hg18", action="store_true") | |
| 182 parser.add_argument("--analysis_type",dest="analysis_type",type=str,choices=["driver","functional","geneannotationonly"], default="driver") | |
| 183 parser.add_argument("--chosendb",dest="chosendb",type=str,nargs="*",choices=["CHASM","SnvGet"], default="CHASM") | |
| 184 parser.add_argument("--cancertype", dest="cancer_type", type=str, choices=CANCERTYPES, required=True) | |
| 185 parser.add_argument("--email", dest="email", required=True, type=str) | |
| 186 parser.add_argument("--annotate", dest="annotate", action="store_true", default=None ) | |
| 187 parser.add_argument("--tsv_report", dest="tsv_report", action="store_true", default=None ) | |
| 188 parser.add_argument("--mupit_out", dest="mupit_out", action="store_true", default=None ) | |
| 189 parser.add_argument("--gene_analysis_out", dest="gene_analysis_out", type=str, required=True) | |
| 190 parser.add_argument("--variant_analysis_out", dest="variant_analysis_out", type=str, required=True) | |
| 191 parser.add_argument("--amino_acid_level_analysis_out", dest="amino_acid_level_analysis_out", type=str, required=True,) | |
| 192 parser.add_argument("--error_file", dest="error_file_out", type=str, required=True) | |
| 193 parser.add_argument("--snv_box_out", dest="snv_box_out", type=str, required=False) | |
| 194 parser.add_argument("--snv_features", dest="snv_features_out", type=str, required=False) | |
| 195 args = (parser.parse_args(params)) | |
| 196 chasm_web = CHASMWeb(mutationbox=args.mutationbox, | |
| 197 filepath=args.input_file_location, | |
| 198 is_hg_18 = args.hg18, | |
| 199 analysis_type=args.analysis_type, | |
| 200 chosendb = args.chosendb, | |
| 201 cancer_type = args.cancer_type, | |
| 202 email=args.email, | |
| 203 annotate_genes=args.annotate, | |
| 204 text_reports=args.tsv_report, | |
| 205 mupit_out=args.mupit_out) | |
| 206 job_id=chasm_web.make_request() | |
| 207 file_map = {"Amino_Acid_Level_Analysis.tsv":args.amino_acid_level_analysis_out, | |
| 208 "SNVBox.tsv":args.snv_box_out, | |
| 209 "Variant_Analysis.tsv":args.variant_analysis_out, | |
| 210 "Gene_Level_Analysis.tsv":args.gene_analysis_out, | |
| 211 "SnvGet Feature Description.xls":args.snv_features_out, | |
| 212 "error.txt":args.error_file_out | |
| 213 } | |
| 214 url = chasm_web.zip_exists(job_id) | |
| 215 download = chasm_web.download_zip(url,job_id) | |
| 216 if download: | |
| 217 move = chasm_web.move_files(file_map=file_map) | |
| 218 else: | |
| 219 stop_err("Unable to download from the server") | |
| 220 | |
| 221 if __name__=="__main__": | |
| 222 main(sys.argv[1:]) |
