1
|
1 #!/usr/bin/python
|
|
2 """
|
|
3 The MIT License (MIT)
|
|
4
|
|
5 Copyright (c) 2013 Saket Choudhary, <saketkc@gmail.com>
|
|
6
|
|
7 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8 of this software and associated documentation files (the "Software"), to deal
|
|
9 in the Software without restriction, including without limitation the rights
|
|
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11 copies of the Software, and to permit persons to whom the Software is
|
|
12 furnished to do so, subject to the following conditions:
|
|
13
|
|
14 The above copyright notice and this permission notice shall be included in
|
|
15 all copies or substantial portions of the Software.
|
|
16
|
|
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
23 THE SOFTWARE.
|
|
24
|
|
25 """
|
|
26 import requests
|
|
27 import argparse
|
|
28 import sys
|
|
29 import time
|
|
30 from functools import wraps
|
|
31 import simplejson as json
|
|
32 import zipfile
|
|
33 import tempfile, ntpath, shutil
|
|
34 import xlrd
|
|
35 import csv
|
|
36 import os
|
|
37 sheet_map = {0:"Variant_Analysis.csv",1:"Amino_Acid_Level_Analysis.csv",2:"Gene_Level_Analysis.csv"}
|
|
38 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
|
39 """Retry calling the decorated function using an exponential backoff.
|
|
40
|
|
41 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
|
|
42 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
|
|
43
|
|
44 :param ExceptionToCheck: the exception to check. may be a tuple of
|
|
45 exceptions to check
|
|
46 :type ExceptionToCheck: Exception or tuple
|
|
47 :param tries: number of times to try (not retry) before giving up
|
|
48 :type tries: int
|
|
49 :param delay: initial delay between retries in seconds
|
|
50 :type delay: int
|
|
51 :param backoff: backoff multiplier e.g. value of 2 will double the delay
|
|
52 each retry
|
|
53 :type backoff: int
|
|
54 :param logger: logger to use. If None, print
|
|
55 :type logger: logging.Logger instance
|
|
56 """
|
|
57 def deco_retry(f):
|
|
58
|
|
59 @wraps(f)
|
|
60 def f_retry(*args, **kwargs):
|
|
61 mtries, mdelay = tries, delay
|
|
62 while mtries > 1:
|
|
63 try:
|
|
64 return f(*args, **kwargs)
|
|
65 except ExceptionToCheck, e:
|
|
66 #msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
|
|
67 msg = "Retrying in %d seconds..." % (mdelay)
|
|
68 if logger:
|
|
69 logger.warning(msg)
|
|
70 else:
|
|
71 print msg
|
|
72 time.sleep(mdelay)
|
|
73 mtries -= 1
|
|
74 mdelay *= backoff
|
|
75 return f(*args, **kwargs)
|
|
76
|
|
77 return f_retry # true decorator
|
|
78
|
|
79 return deco_retry
|
|
80 CANCERTYPES =["Bladder","Blood-Lymphocyte","Blood-Myeloid","Brain-Cerebellum","Brain-Glioblastoma_Multiforme","Brain-Lower_Grade_Glioma","Breast","Cervix","Colon","Head_and_Neck","Kidney-Chromophobe","Kidney-Clear_Cell","Kidney-Papiallary_Cell","Liver-Nonviral","Liver-Viral","Lung-Adenocarcinoma","Lung-Squamous_Cell","Melanoma","Other","Ovary","Pancreas","Prostate-Adenocarcinoma","Rectum","Skin","Stomach","Thyroid","Uterus"]
|
|
81 URL="http://www.cravat.us/ClassifierSelect1"
|
|
82 def stop_err( msg ):
|
|
83 sys.stderr.write( '%s\n' % msg )
|
|
84 sys.exit()
|
|
85
|
|
86 class CHASMWeb:
|
|
87 def __init__( self, mutationbox=None, filepath=None,is_hg_18=None, analysis_type=None, analysis_program=None, chosendb=None, cancer_type=None, email=None, annotate_genes=None, text_reports=None, mupit_out=None ):
|
|
88 self.mutationbox = mutationbox
|
|
89 self.filepath = filepath
|
|
90 self.is_hg_18 = is_hg_18
|
|
91 self.analysis_type = analysis_type
|
|
92 self.analysis_program = analysis_program
|
|
93 self.chosendb = chosendb
|
|
94 self.email = email
|
|
95 self.annotate_genes = annotate_genes
|
|
96 self.cancer_type = cancer_type
|
|
97 self.email = email
|
|
98 self.annotate_genes = annotate_genes
|
|
99 self.text_reports=text_reports
|
|
100 self.mupit_input= mupit_out
|
|
101 def make_request( self ):
|
|
102 data = {
|
|
103 "mutationbox":self.mutationbox,
|
|
104 "hg18": self.is_hg_18,
|
|
105 "analysistype": self.analysis_type,
|
|
106 "chosendb": self.analysis_program,
|
|
107 "cancertype": self.cancer_type,
|
|
108 "geneannotcheckbox": self.annotate_genes,
|
|
109 "emailbox": self.email,
|
|
110 "tsvreport": self.text_reports,
|
|
111 "mupitinput": self.mupit_input,
|
|
112 }
|
|
113 stripped_data = {}
|
|
114
|
|
115 for key,value in data.iteritems():
|
|
116 if value==True:
|
|
117 value="on"
|
|
118 if value!=None and value!=False:
|
|
119 stripped_data[key]=value
|
|
120 #print stripped_data
|
|
121 if not self.mutationbox:
|
|
122 file_payload={"inputfile":open(self.filepath)}
|
|
123 request = requests.post(URL, data=stripped_data, files=file_payload)
|
|
124 else:
|
|
125 request = requests.post(URL, data=stripped_data, files=dict(foo='bar'))
|
|
126 job_id = json.loads(request.text)["jobId"]
|
|
127 return job_id
|
|
128 @retry(requests.exceptions.HTTPError)
|
|
129 def zip_exists(self,job_id ):
|
|
130 url="http://www.cravat.us/results/%s/%s.zip" %(job_id,job_id)
|
|
131 zip_download_request = requests.request("GET", url)
|
|
132 if zip_download_request.status_code==404:
|
|
133 raise requests.HTTPError()
|
|
134 else:
|
|
135 return url
|
|
136 def download_zip( self, url, job_id):
|
|
137 self.tmp_dir = tempfile.mkdtemp()
|
|
138 r = requests.get( url, stream=True )
|
|
139 if r.status_code == 200:
|
|
140 self.path = os.path.join( self.tmp_dir,job_id+".zip" )
|
|
141 with open(self.path, 'wb') as f:
|
|
142 for chunk in r.iter_content(128):
|
|
143 f.write(chunk)
|
|
144 else:
|
|
145 self.path = None
|
|
146 return self.path
|
|
147
|
|
148 def move_files( self, file_map ):
|
|
149 fh = open(self.path,"rb")
|
|
150 zip_files = zipfile.ZipFile(fh)
|
|
151 for name in zip_files.namelist():
|
|
152 filename = ntpath.basename(name)
|
|
153 extension = ntpath.splitext(filename)[-1]
|
|
154 source_file = zip_files.open(name)
|
|
155 if extension==".txt":
|
|
156 target_file = open(file_map["error.txt"],"wb")
|
|
157 elif filename!="SnvGet Feature Description.xls" and extension!=".xls":
|
|
158 target_file = open(file_map[filename],"wbb")#file(os.path.join(output_dir,filename),"wb")
|
|
159 else:
|
|
160 target_file=None
|
|
161 if target_file:
|
|
162 with source_file,target_file:
|
|
163 shutil.copyfileobj(source_file,target_file)
|
|
164 if filename=="SnvGet Feature Description.xls":
|
|
165 with xlrd.open_workbook(source_file) as wb:
|
|
166 sheet_names = wb.sheet_names()
|
|
167 for name in sheet_names:
|
|
168 sh=wb.sheet_by_name(name)
|
|
169 name_shortened= name.replace(" ").strip()+".csv"
|
|
170 with open(name_shortened,'wb') as f:
|
|
171 c = csv.writer(f)
|
|
172 for r in range(sh.nrows):
|
|
173 c.writerow(sh.row_values(r))
|
|
174 shutil.rmtree(self.tmp_dir)
|
|
175 fh.close()
|
|
176 def main(params):
|
|
177
|
|
178 parser = argparse.ArgumentParser()
|
|
179 parser.add_argument("-i","--input", type=str, dest="mutationbox", help="Input variants")
|
|
180 parser.add_argument("--path",type=str,dest="input_file_location", help="Input file location")
|
|
181 parser.add_argument("--hg18", dest="hg18", action="store_true")
|
|
182 parser.add_argument("--analysis_type",dest="analysis_type",type=str,choices=["driver","functional","geneannotationonly"], default="driver")
|
|
183 parser.add_argument("--chosendb",dest="chosendb",type=str,nargs="*",choices=["CHASM","SnvGet"], default="CHASM")
|
|
184 parser.add_argument("--cancertype", dest="cancer_type", type=str, choices=CANCERTYPES, required=True)
|
|
185 parser.add_argument("--email", dest="email", required=True, type=str)
|
|
186 parser.add_argument("--annotate", dest="annotate", action="store_true", default=None )
|
|
187 parser.add_argument("--tsv_report", dest="tsv_report", action="store_true", default=None )
|
|
188 parser.add_argument("--mupit_out", dest="mupit_out", action="store_true", default=None )
|
|
189 parser.add_argument("--gene_analysis_out", dest="gene_analysis_out", type=str, required=True)
|
|
190 parser.add_argument("--variant_analysis_out", dest="variant_analysis_out", type=str, required=True)
|
|
191 parser.add_argument("--amino_acid_level_analysis_out", dest="amino_acid_level_analysis_out", type=str, required=True,)
|
|
192 parser.add_argument("--error_file", dest="error_file_out", type=str, required=True)
|
|
193 parser.add_argument("--snv_box_out", dest="snv_box_out", type=str, required=False)
|
|
194 parser.add_argument("--snv_features", dest="snv_features_out", type=str, required=False)
|
|
195 args = (parser.parse_args(params))
|
|
196 chasm_web = CHASMWeb(mutationbox=args.mutationbox,
|
|
197 filepath=args.input_file_location,
|
|
198 is_hg_18 = args.hg18,
|
|
199 analysis_type=args.analysis_type,
|
|
200 chosendb = args.chosendb,
|
|
201 cancer_type = args.cancer_type,
|
|
202 email=args.email,
|
|
203 annotate_genes=args.annotate,
|
|
204 text_reports=args.tsv_report,
|
|
205 mupit_out=args.mupit_out)
|
|
206 job_id=chasm_web.make_request()
|
|
207 file_map = {"Amino_Acid_Level_Analysis.tsv":args.amino_acid_level_analysis_out,
|
|
208 "SNVBox.tsv":args.snv_box_out,
|
|
209 "Variant_Analysis.tsv":args.variant_analysis_out,
|
|
210 "Gene_Level_Analysis.tsv":args.gene_analysis_out,
|
|
211 "SnvGet Feature Description.xls":args.snv_features_out,
|
|
212 "error.txt":args.error_file_out
|
|
213 }
|
|
214 url = chasm_web.zip_exists(job_id)
|
|
215 download = chasm_web.download_zip(url,job_id)
|
|
216 if download:
|
|
217 move = chasm_web.move_files(file_map=file_map)
|
|
218 else:
|
|
219 stop_err("Unable to download from the server")
|
|
220
|
|
221 if __name__=="__main__":
|
|
222 main(sys.argv[1:])
|