diff tools/chasm/chasm_web.py @ 1:8eaaa7f6b619

Move files to tools
author Saket Choudhary <saketkc@gmail.com>
date Fri, 01 Nov 2013 02:07:53 +0530
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/chasm/chasm_web.py	Fri Nov 01 02:07:53 2013 +0530
@@ -0,0 +1,222 @@
+#!/usr/bin/python
+"""
+The MIT License (MIT)
+
+Copyright (c) 2013 Saket Choudhary, <saketkc@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+"""
+import requests
+import argparse
+import sys
+import time
+from functools import wraps
+import simplejson as json
+import zipfile
+import tempfile, ntpath, shutil
+import xlrd
+import csv
+import os
+sheet_map = {0:"Variant_Analysis.csv",1:"Amino_Acid_Level_Analysis.csv",2:"Gene_Level_Analysis.csv"}
+def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
+    """Retry calling the decorated function using an exponential backoff.
+
+    http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+    original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
+
+    :param ExceptionToCheck: the exception to check. may be a tuple of
+        exceptions to check
+    :type ExceptionToCheck: Exception or tuple
+    :param tries: number of times to try (not retry) before giving up
+    :type tries: int
+    :param delay: initial delay between retries in seconds
+    :type delay: int
+    :param backoff: backoff multiplier e.g. value of 2 will double the delay
+        each retry
+    :type backoff: int
+    :param logger: logger to use. If None, print
+    :type logger: logging.Logger instance
+    """
+    def deco_retry(f):
+
+        @wraps(f)
+        def f_retry(*args, **kwargs):
+            mtries, mdelay = tries, delay
+            while mtries > 1:
+                try:
+                    return f(*args, **kwargs)
+                except ExceptionToCheck, e:
+                    #msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
+                    msg = "Retrying in %d seconds..." %  (mdelay)
+                    if logger:
+                        logger.warning(msg)
+                    else:
+                        print msg
+                    time.sleep(mdelay)
+                    mtries -= 1
+                    mdelay *= backoff
+            return f(*args, **kwargs)
+
+        return f_retry  # true decorator
+
+    return deco_retry
+CANCERTYPES =["Bladder","Blood-Lymphocyte","Blood-Myeloid","Brain-Cerebellum","Brain-Glioblastoma_Multiforme","Brain-Lower_Grade_Glioma","Breast","Cervix","Colon","Head_and_Neck","Kidney-Chromophobe","Kidney-Clear_Cell","Kidney-Papiallary_Cell","Liver-Nonviral","Liver-Viral","Lung-Adenocarcinoma","Lung-Squamous_Cell","Melanoma","Other","Ovary","Pancreas","Prostate-Adenocarcinoma","Rectum","Skin","Stomach","Thyroid","Uterus"]
+URL="http://www.cravat.us/ClassifierSelect1"
+def stop_err( msg ):
+    sys.stderr.write( '%s\n' % msg )
+    sys.exit()
+
+class CHASMWeb:
+    def __init__( self, mutationbox=None, filepath=None,is_hg_18=None, analysis_type=None, analysis_program=None, chosendb=None, cancer_type=None, email=None, annotate_genes=None, text_reports=None, mupit_out=None ):
+        self.mutationbox = mutationbox
+        self.filepath = filepath
+        self.is_hg_18 = is_hg_18
+        self.analysis_type = analysis_type
+        self.analysis_program = analysis_program
+        self.chosendb = chosendb
+        self.email = email
+        self.annotate_genes = annotate_genes
+        self.cancer_type = cancer_type
+        self.email = email
+        self.annotate_genes = annotate_genes
+        self.text_reports=text_reports
+        self.mupit_input= mupit_out
+    def make_request( self ):
+        data = {
+                "mutationbox":self.mutationbox,
+                "hg18": self.is_hg_18,
+                "analysistype": self.analysis_type,
+                "chosendb": self.analysis_program,
+                "cancertype": self.cancer_type,
+                "geneannotcheckbox": self.annotate_genes,
+                "emailbox": self.email,
+                "tsvreport": self.text_reports,
+                "mupitinput": self.mupit_input,
+                }
+        stripped_data = {}
+
+        for key,value in data.iteritems():
+            if value==True:
+                value="on"
+            if value!=None and value!=False:
+                stripped_data[key]=value
+        #print stripped_data
+        if not self.mutationbox:
+            file_payload={"inputfile":open(self.filepath)}
+            request = requests.post(URL, data=stripped_data, files=file_payload)
+        else:
+            request = requests.post(URL, data=stripped_data, files=dict(foo='bar'))
+        job_id = json.loads(request.text)["jobId"]
+        return job_id
+    @retry(requests.exceptions.HTTPError)
+    def zip_exists(self,job_id ):
+        url="http://www.cravat.us/results/%s/%s.zip" %(job_id,job_id)
+        zip_download_request = requests.request("GET", url)
+        if zip_download_request.status_code==404:
+            raise requests.HTTPError()
+        else:
+            return url
+    def download_zip( self, url, job_id):
+        self.tmp_dir = tempfile.mkdtemp()
+        r = requests.get( url, stream=True )
+        if r.status_code == 200:
+            self.path = os.path.join( self.tmp_dir,job_id+".zip" )
+            with open(self.path, 'wb') as f:
+                for chunk in r.iter_content(128):
+                    f.write(chunk)
+        else:
+            self.path = None
+        return self.path
+
+    def move_files( self, file_map ):
+        fh = open(self.path,"rb")
+        zip_files = zipfile.ZipFile(fh)
+        for name in zip_files.namelist():
+            filename = ntpath.basename(name)
+            extension = ntpath.splitext(filename)[-1]
+            source_file = zip_files.open(name)
+            if extension==".txt":
+                target_file = open(file_map["error.txt"],"wb")
+            elif filename!="SnvGet Feature Description.xls" and extension!=".xls":
+                target_file = open(file_map[filename],"wbb")#file(os.path.join(output_dir,filename),"wb")
+            else:
+                target_file=None
+            if target_file:
+                with source_file,target_file:
+                    shutil.copyfileobj(source_file,target_file)
+            if filename=="SnvGet Feature Description.xls":
+                with xlrd.open_workbook(source_file) as wb:
+                    sheet_names = wb.sheet_names()
+                    for name  in sheet_names:
+                        sh=wb.sheet_by_name(name)
+                        name_shortened= name.replace(" ").strip()+".csv"
+                        with open(name_shortened,'wb') as f:
+                            c = csv.writer(f)
+                            for r in range(sh.nrows):
+                                c.writerow(sh.row_values(r))
+        shutil.rmtree(self.tmp_dir)
+        fh.close()
+def main(params):
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i","--input", type=str, dest="mutationbox", help="Input variants")
+    parser.add_argument("--path",type=str,dest="input_file_location", help="Input file location")
+    parser.add_argument("--hg18", dest="hg18", action="store_true")
+    parser.add_argument("--analysis_type",dest="analysis_type",type=str,choices=["driver","functional","geneannotationonly"], default="driver")
+    parser.add_argument("--chosendb",dest="chosendb",type=str,nargs="*",choices=["CHASM","SnvGet"], default="CHASM")
+    parser.add_argument("--cancertype", dest="cancer_type", type=str, choices=CANCERTYPES, required=True)
+    parser.add_argument("--email", dest="email", required=True, type=str)
+    parser.add_argument("--annotate", dest="annotate", action="store_true", default=None )
+    parser.add_argument("--tsv_report", dest="tsv_report", action="store_true", default=None )
+    parser.add_argument("--mupit_out", dest="mupit_out", action="store_true", default=None )
+    parser.add_argument("--gene_analysis_out", dest="gene_analysis_out", type=str, required=True)
+    parser.add_argument("--variant_analysis_out", dest="variant_analysis_out", type=str, required=True)
+    parser.add_argument("--amino_acid_level_analysis_out", dest="amino_acid_level_analysis_out", type=str, required=True,)
+    parser.add_argument("--error_file", dest="error_file_out", type=str, required=True)
+    parser.add_argument("--snv_box_out", dest="snv_box_out", type=str, required=False)
+    parser.add_argument("--snv_features", dest="snv_features_out", type=str, required=False)
+    args = (parser.parse_args(params))
+    chasm_web = CHASMWeb(mutationbox=args.mutationbox,
+                         filepath=args.input_file_location,
+                         is_hg_18 = args.hg18,
+                         analysis_type=args.analysis_type,
+                         chosendb = args.chosendb,
+                         cancer_type = args.cancer_type,
+                         email=args.email,
+                         annotate_genes=args.annotate,
+                         text_reports=args.tsv_report,
+                         mupit_out=args.mupit_out)
+    job_id=chasm_web.make_request()
+    file_map = {"Amino_Acid_Level_Analysis.tsv":args.amino_acid_level_analysis_out,
+                "SNVBox.tsv":args.snv_box_out,
+                "Variant_Analysis.tsv":args.variant_analysis_out,
+                "Gene_Level_Analysis.tsv":args.gene_analysis_out,
+                "SnvGet Feature Description.xls":args.snv_features_out,
+                "error.txt":args.error_file_out
+                }
+    url = chasm_web.zip_exists(job_id)
+    download = chasm_web.download_zip(url,job_id)
+    if download:
+        move = chasm_web.move_files(file_map=file_map)
+    else:
+        stop_err("Unable to download from the server")
+
+if __name__=="__main__":
+    main(sys.argv[1:])