comparison tools/chasm/chasm_web.py @ 1:8eaaa7f6b619

Move files to tools
author Saket Choudhary <saketkc@gmail.com>
date Fri, 01 Nov 2013 02:07:53 +0530
parents
children
comparison
equal deleted inserted replaced
0:aea1a2363a94 1:8eaaa7f6b619
1 #!/usr/bin/python
2 """
3 The MIT License (MIT)
4
5 Copyright (c) 2013 Saket Choudhary, <saketkc@gmail.com>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 THE SOFTWARE.
24
25 """
26 import requests
27 import argparse
28 import sys
29 import time
30 from functools import wraps
31 import simplejson as json
32 import zipfile
33 import tempfile, ntpath, shutil
34 import xlrd
35 import csv
36 import os
37 sheet_map = {0:"Variant_Analysis.csv",1:"Amino_Acid_Level_Analysis.csv",2:"Gene_Level_Analysis.csv"}
38 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
39 """Retry calling the decorated function using an exponential backoff.
40
41 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
42 original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
43
44 :param ExceptionToCheck: the exception to check. may be a tuple of
45 exceptions to check
46 :type ExceptionToCheck: Exception or tuple
47 :param tries: number of times to try (not retry) before giving up
48 :type tries: int
49 :param delay: initial delay between retries in seconds
50 :type delay: int
51 :param backoff: backoff multiplier e.g. value of 2 will double the delay
52 each retry
53 :type backoff: int
54 :param logger: logger to use. If None, print
55 :type logger: logging.Logger instance
56 """
57 def deco_retry(f):
58
59 @wraps(f)
60 def f_retry(*args, **kwargs):
61 mtries, mdelay = tries, delay
62 while mtries > 1:
63 try:
64 return f(*args, **kwargs)
65 except ExceptionToCheck, e:
66 #msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
67 msg = "Retrying in %d seconds..." % (mdelay)
68 if logger:
69 logger.warning(msg)
70 else:
71 print msg
72 time.sleep(mdelay)
73 mtries -= 1
74 mdelay *= backoff
75 return f(*args, **kwargs)
76
77 return f_retry # true decorator
78
79 return deco_retry
80 CANCERTYPES =["Bladder","Blood-Lymphocyte","Blood-Myeloid","Brain-Cerebellum","Brain-Glioblastoma_Multiforme","Brain-Lower_Grade_Glioma","Breast","Cervix","Colon","Head_and_Neck","Kidney-Chromophobe","Kidney-Clear_Cell","Kidney-Papiallary_Cell","Liver-Nonviral","Liver-Viral","Lung-Adenocarcinoma","Lung-Squamous_Cell","Melanoma","Other","Ovary","Pancreas","Prostate-Adenocarcinoma","Rectum","Skin","Stomach","Thyroid","Uterus"]
81 URL="http://www.cravat.us/ClassifierSelect1"
82 def stop_err( msg ):
83 sys.stderr.write( '%s\n' % msg )
84 sys.exit()
85
86 class CHASMWeb:
87 def __init__( self, mutationbox=None, filepath=None,is_hg_18=None, analysis_type=None, analysis_program=None, chosendb=None, cancer_type=None, email=None, annotate_genes=None, text_reports=None, mupit_out=None ):
88 self.mutationbox = mutationbox
89 self.filepath = filepath
90 self.is_hg_18 = is_hg_18
91 self.analysis_type = analysis_type
92 self.analysis_program = analysis_program
93 self.chosendb = chosendb
94 self.email = email
95 self.annotate_genes = annotate_genes
96 self.cancer_type = cancer_type
97 self.email = email
98 self.annotate_genes = annotate_genes
99 self.text_reports=text_reports
100 self.mupit_input= mupit_out
101 def make_request( self ):
102 data = {
103 "mutationbox":self.mutationbox,
104 "hg18": self.is_hg_18,
105 "analysistype": self.analysis_type,
106 "chosendb": self.analysis_program,
107 "cancertype": self.cancer_type,
108 "geneannotcheckbox": self.annotate_genes,
109 "emailbox": self.email,
110 "tsvreport": self.text_reports,
111 "mupitinput": self.mupit_input,
112 }
113 stripped_data = {}
114
115 for key,value in data.iteritems():
116 if value==True:
117 value="on"
118 if value!=None and value!=False:
119 stripped_data[key]=value
120 #print stripped_data
121 if not self.mutationbox:
122 file_payload={"inputfile":open(self.filepath)}
123 request = requests.post(URL, data=stripped_data, files=file_payload)
124 else:
125 request = requests.post(URL, data=stripped_data, files=dict(foo='bar'))
126 job_id = json.loads(request.text)["jobId"]
127 return job_id
128 @retry(requests.exceptions.HTTPError)
129 def zip_exists(self,job_id ):
130 url="http://www.cravat.us/results/%s/%s.zip" %(job_id,job_id)
131 zip_download_request = requests.request("GET", url)
132 if zip_download_request.status_code==404:
133 raise requests.HTTPError()
134 else:
135 return url
136 def download_zip( self, url, job_id):
137 self.tmp_dir = tempfile.mkdtemp()
138 r = requests.get( url, stream=True )
139 if r.status_code == 200:
140 self.path = os.path.join( self.tmp_dir,job_id+".zip" )
141 with open(self.path, 'wb') as f:
142 for chunk in r.iter_content(128):
143 f.write(chunk)
144 else:
145 self.path = None
146 return self.path
147
148 def move_files( self, file_map ):
149 fh = open(self.path,"rb")
150 zip_files = zipfile.ZipFile(fh)
151 for name in zip_files.namelist():
152 filename = ntpath.basename(name)
153 extension = ntpath.splitext(filename)[-1]
154 source_file = zip_files.open(name)
155 if extension==".txt":
156 target_file = open(file_map["error.txt"],"wb")
157 elif filename!="SnvGet Feature Description.xls" and extension!=".xls":
158 target_file = open(file_map[filename],"wbb")#file(os.path.join(output_dir,filename),"wb")
159 else:
160 target_file=None
161 if target_file:
162 with source_file,target_file:
163 shutil.copyfileobj(source_file,target_file)
164 if filename=="SnvGet Feature Description.xls":
165 with xlrd.open_workbook(source_file) as wb:
166 sheet_names = wb.sheet_names()
167 for name in sheet_names:
168 sh=wb.sheet_by_name(name)
169 name_shortened= name.replace(" ").strip()+".csv"
170 with open(name_shortened,'wb') as f:
171 c = csv.writer(f)
172 for r in range(sh.nrows):
173 c.writerow(sh.row_values(r))
174 shutil.rmtree(self.tmp_dir)
175 fh.close()
176 def main(params):
177
178 parser = argparse.ArgumentParser()
179 parser.add_argument("-i","--input", type=str, dest="mutationbox", help="Input variants")
180 parser.add_argument("--path",type=str,dest="input_file_location", help="Input file location")
181 parser.add_argument("--hg18", dest="hg18", action="store_true")
182 parser.add_argument("--analysis_type",dest="analysis_type",type=str,choices=["driver","functional","geneannotationonly"], default="driver")
183 parser.add_argument("--chosendb",dest="chosendb",type=str,nargs="*",choices=["CHASM","SnvGet"], default="CHASM")
184 parser.add_argument("--cancertype", dest="cancer_type", type=str, choices=CANCERTYPES, required=True)
185 parser.add_argument("--email", dest="email", required=True, type=str)
186 parser.add_argument("--annotate", dest="annotate", action="store_true", default=None )
187 parser.add_argument("--tsv_report", dest="tsv_report", action="store_true", default=None )
188 parser.add_argument("--mupit_out", dest="mupit_out", action="store_true", default=None )
189 parser.add_argument("--gene_analysis_out", dest="gene_analysis_out", type=str, required=True)
190 parser.add_argument("--variant_analysis_out", dest="variant_analysis_out", type=str, required=True)
191 parser.add_argument("--amino_acid_level_analysis_out", dest="amino_acid_level_analysis_out", type=str, required=True,)
192 parser.add_argument("--error_file", dest="error_file_out", type=str, required=True)
193 parser.add_argument("--snv_box_out", dest="snv_box_out", type=str, required=False)
194 parser.add_argument("--snv_features", dest="snv_features_out", type=str, required=False)
195 args = (parser.parse_args(params))
196 chasm_web = CHASMWeb(mutationbox=args.mutationbox,
197 filepath=args.input_file_location,
198 is_hg_18 = args.hg18,
199 analysis_type=args.analysis_type,
200 chosendb = args.chosendb,
201 cancer_type = args.cancer_type,
202 email=args.email,
203 annotate_genes=args.annotate,
204 text_reports=args.tsv_report,
205 mupit_out=args.mupit_out)
206 job_id=chasm_web.make_request()
207 file_map = {"Amino_Acid_Level_Analysis.tsv":args.amino_acid_level_analysis_out,
208 "SNVBox.tsv":args.snv_box_out,
209 "Variant_Analysis.tsv":args.variant_analysis_out,
210 "Gene_Level_Analysis.tsv":args.gene_analysis_out,
211 "SnvGet Feature Description.xls":args.snv_features_out,
212 "error.txt":args.error_file_out
213 }
214 url = chasm_web.zip_exists(job_id)
215 download = chasm_web.download_zip(url,job_id)
216 if download:
217 move = chasm_web.move_files(file_map=file_map)
218 else:
219 stop_err("Unable to download from the server")
220
221 if __name__=="__main__":
222 main(sys.argv[1:])