changeset 0:aea1a2363a94

Initial commit
author Saket Choudhary <saketkc@gmail.com>
date Fri, 01 Nov 2013 02:05:02 +0530
parents
children 8eaaa7f6b619
files chasm/README.rst chasm/chasm_web.py chasm/chasm_web.xml
diffstat 3 files changed, 354 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chasm/README.rst	Fri Nov 01 02:05:02 2013 +0530
@@ -0,0 +1,38 @@
+Galaxy wrapper for the CHASM webservice at CRAVAT(v2.0)
+===================================================
+
+This tool is copyright 2013 by Saket Choudhary, Indian Institute of Technology Bombay
+All rights reserved. MIT licensed.
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Citations
+===========
+
+
+If you use this Galaxy tool in work leading to a scientific publication please cite:
+
+Carter, Hannah, et al. "Cancer-specific high-throughput annotation of somatic mutations: computational prediction of driver missense mutations."
+Cancer research 69.16 (2009): 6660-6667.
+
+Wong, Wing Chung, et al. "CHASM and SNVBox: toolkit for detecting biologically important single nucleotide mutations in cancer."
+Bioinformatics 27.15 (2011): 2147-2148.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chasm/chasm_web.py	Fri Nov 01 02:05:02 2013 +0530
@@ -0,0 +1,222 @@
+#!/usr/bin/python
+"""
+The MIT License (MIT)
+
+Copyright (c) 2013 Saket Choudhary, <saketkc@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+"""
+import requests
+import argparse
+import sys
+import time
+from functools import wraps
+import simplejson as json
+import zipfile
+import tempfile, ntpath, shutil
+import xlrd
+import csv
+import os
+sheet_map = {0:"Variant_Analysis.csv",1:"Amino_Acid_Level_Analysis.csv",2:"Gene_Level_Analysis.csv"}
+def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
+    """Retry calling the decorated function using an exponential backoff.
+
+    http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
+    original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
+
+    :param ExceptionToCheck: the exception to check. may be a tuple of
+        exceptions to check
+    :type ExceptionToCheck: Exception or tuple
+    :param tries: number of times to try (not retry) before giving up
+    :type tries: int
+    :param delay: initial delay between retries in seconds
+    :type delay: int
+    :param backoff: backoff multiplier e.g. value of 2 will double the delay
+        each retry
+    :type backoff: int
+    :param logger: logger to use. If None, print
+    :type logger: logging.Logger instance
+    """
+    def deco_retry(f):
+
+        @wraps(f)
+        def f_retry(*args, **kwargs):
+            mtries, mdelay = tries, delay
+            while mtries > 1:
+                try:
+                    return f(*args, **kwargs)
+                except ExceptionToCheck, e:
+                    #msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
+                    msg = "Retrying in %d seconds..." %  (mdelay)
+                    if logger:
+                        logger.warning(msg)
+                    else:
+                        print msg
+                    time.sleep(mdelay)
+                    mtries -= 1
+                    mdelay *= backoff
+            return f(*args, **kwargs)
+
+        return f_retry  # true decorator
+
+    return deco_retry
+CANCERTYPES =["Bladder","Blood-Lymphocyte","Blood-Myeloid","Brain-Cerebellum","Brain-Glioblastoma_Multiforme","Brain-Lower_Grade_Glioma","Breast","Cervix","Colon","Head_and_Neck","Kidney-Chromophobe","Kidney-Clear_Cell","Kidney-Papiallary_Cell","Liver-Nonviral","Liver-Viral","Lung-Adenocarcinoma","Lung-Squamous_Cell","Melanoma","Other","Ovary","Pancreas","Prostate-Adenocarcinoma","Rectum","Skin","Stomach","Thyroid","Uterus"]
+URL="http://www.cravat.us/ClassifierSelect1"
+def stop_err( msg ):
+    sys.stderr.write( '%s\n' % msg )
+    sys.exit()
+
+class CHASMWeb:
+    def __init__( self, mutationbox=None, filepath=None,is_hg_18=None, analysis_type=None, analysis_program=None, chosendb=None, cancer_type=None, email=None, annotate_genes=None, text_reports=None, mupit_out=None ):
+        self.mutationbox = mutationbox
+        self.filepath = filepath
+        self.is_hg_18 = is_hg_18
+        self.analysis_type = analysis_type
+        self.analysis_program = analysis_program
+        self.chosendb = chosendb
+        self.email = email
+        self.annotate_genes = annotate_genes
+        self.cancer_type = cancer_type
+        self.email = email
+        self.annotate_genes = annotate_genes
+        self.text_reports=text_reports
+        self.mupit_input= mupit_out
+    def make_request( self ):
+        data = {
+                "mutationbox":self.mutationbox,
+                "hg18": self.is_hg_18,
+                "analysistype": self.analysis_type,
+                "chosendb": self.analysis_program,
+                "cancertype": self.cancer_type,
+                "geneannotcheckbox": self.annotate_genes,
+                "emailbox": self.email,
+                "tsvreport": self.text_reports,
+                "mupitinput": self.mupit_input,
+                }
+        stripped_data = {}
+
+        for key,value in data.iteritems():
+            if value==True:
+                value="on"
+            if value!=None and value!=False:
+                stripped_data[key]=value
+        #print stripped_data
+        if not self.mutationbox:
+            file_payload={"inputfile":open(self.filepath)}
+            request = requests.post(URL, data=stripped_data, files=file_payload)
+        else:
+            request = requests.post(URL, data=stripped_data, files=dict(foo='bar'))
+        job_id = json.loads(request.text)["jobId"]
+        return job_id
+    @retry(requests.exceptions.HTTPError)
+    def zip_exists(self,job_id ):
+        url="http://www.cravat.us/results/%s/%s.zip" %(job_id,job_id)
+        zip_download_request = requests.request("GET", url)
+        if zip_download_request.status_code==404:
+            raise requests.HTTPError()
+        else:
+            return url
+    def download_zip( self, url, job_id):
+        self.tmp_dir = tempfile.mkdtemp()
+        r = requests.get( url, stream=True )
+        if r.status_code == 200:
+            self.path = os.path.join( self.tmp_dir,job_id+".zip" )
+            with open(self.path, 'wb') as f:
+                for chunk in r.iter_content(128):
+                    f.write(chunk)
+        else:
+            self.path = None
+        return self.path
+
+    def move_files( self, file_map ):
+        fh = open(self.path,"rb")
+        zip_files = zipfile.ZipFile(fh)
+        for name in zip_files.namelist():
+            filename = ntpath.basename(name)
+            extension = ntpath.splitext(filename)[-1]
+            source_file = zip_files.open(name)
+            if extension==".txt":
+                target_file = open(file_map["error.txt"],"wb")
+            elif filename!="SnvGet Feature Description.xls" and extension!=".xls":
+                target_file = open(file_map[filename],"wbb")#file(os.path.join(output_dir,filename),"wb")
+            else:
+                target_file=None
+            if target_file:
+                with source_file,target_file:
+                    shutil.copyfileobj(source_file,target_file)
+            if filename=="SnvGet Feature Description.xls":
+                with xlrd.open_workbook(source_file) as wb:
+                    sheet_names = wb.sheet_names()
+                    for name  in sheet_names:
+                        sh=wb.sheet_by_name(name)
+                        name_shortened= name.replace(" ").strip()+".csv"
+                        with open(name_shortened,'wb') as f:
+                            c = csv.writer(f)
+                            for r in range(sh.nrows):
+                                c.writerow(sh.row_values(r))
+        shutil.rmtree(self.tmp_dir)
+        fh.close()
+def main(params):
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i","--input", type=str, dest="mutationbox", help="Input variants")
+    parser.add_argument("--path",type=str,dest="input_file_location", help="Input file location")
+    parser.add_argument("--hg18", dest="hg18", action="store_true")
+    parser.add_argument("--analysis_type",dest="analysis_type",type=str,choices=["driver","functional","geneannotationonly"], default="driver")
+    parser.add_argument("--chosendb",dest="chosendb",type=str,nargs="*",choices=["CHASM","SnvGet"], default="CHASM")
+    parser.add_argument("--cancertype", dest="cancer_type", type=str, choices=CANCERTYPES, required=True)
+    parser.add_argument("--email", dest="email", required=True, type=str)
+    parser.add_argument("--annotate", dest="annotate", action="store_true", default=None )
+    parser.add_argument("--tsv_report", dest="tsv_report", action="store_true", default=None )
+    parser.add_argument("--mupit_out", dest="mupit_out", action="store_true", default=None )
+    parser.add_argument("--gene_analysis_out", dest="gene_analysis_out", type=str, required=True)
+    parser.add_argument("--variant_analysis_out", dest="variant_analysis_out", type=str, required=True)
+    parser.add_argument("--amino_acid_level_analysis_out", dest="amino_acid_level_analysis_out", type=str, required=True,)
+    parser.add_argument("--error_file", dest="error_file_out", type=str, required=True)
+    parser.add_argument("--snv_box_out", dest="snv_box_out", type=str, required=False)
+    parser.add_argument("--snv_features", dest="snv_features_out", type=str, required=False)
+    args = (parser.parse_args(params))
+    chasm_web = CHASMWeb(mutationbox=args.mutationbox,
+                         filepath=args.input_file_location,
+                         is_hg_18 = args.hg18,
+                         analysis_type=args.analysis_type,
+                         chosendb = args.chosendb,
+                         cancer_type = args.cancer_type,
+                         email=args.email,
+                         annotate_genes=args.annotate,
+                         text_reports=args.tsv_report,
+                         mupit_out=args.mupit_out)
+    job_id=chasm_web.make_request()
+    file_map = {"Amino_Acid_Level_Analysis.tsv":args.amino_acid_level_analysis_out,
+                "SNVBox.tsv":args.snv_box_out,
+                "Variant_Analysis.tsv":args.variant_analysis_out,
+                "Gene_Level_Analysis.tsv":args.gene_analysis_out,
+                "SnvGet Feature Description.xls":args.snv_features_out,
+                "error.txt":args.error_file_out
+                }
+    url = chasm_web.zip_exists(job_id)
+    download = chasm_web.download_zip(url,job_id)
+    if download:
+        move = chasm_web.move_files(file_map=file_map)
+    else:
+        stop_err("Unable to download from the server")
+
+if __name__=="__main__":
+    main(sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chasm/chasm_web.xml	Fri Nov 01 02:05:02 2013 +0530
@@ -0,0 +1,94 @@
+<tool id="chasm_webservice" name="CHASM Webservice" version="1.0.0" hidden="false">
+    <requirements>
+        <requirement type="python-module">requests</requirement>
+        <requirement type="python-module">xlrd</requirement>
+    </requirements>
+    <description>CHASM score using CRAVAT webservice</description>
+    <command interpreter="python">
+        chasm_web.py --path $input --analysis_type $analysis_type --cancertype $tissue_type --email $__user_email__ --gene_analysis_out $gene_analysis_out  --variant_analysis_out $variant_analysis_out --amino_acid_level_analysis_out $amino_acid_level_analysis_out  --error_file $error_file
+    </command>
+    <inputs>
+        <param format="txt" name="input" type="data" label="Variants File" />
+        <param name="analysis_type" type="select" label="Choose analysis type" help="
+                                                                        Cancer driver analysis predicts whether\
+                                                                        the submitted variants are cancer drivers.\
+                                                                        Functional effect analysis predicts whether\
+                                                                        the submitted variants will have any\
+                                                                        functional effect on their translated proteins.\
+                                                                        Annotation only provides\
+                                                                        GeneCard and PubMed information on\
+                                                                        the genes containing the submitted variants.">
+            <option value="driver">Cancer driver analysis</option>
+            <option value="functional">Functional effect analysis</option>
+            <option value="geneannotationonly">Annotation only</option>
+        </param>
+
+        <param name="gene_annotation" type="select" label="Include Gene annotation">
+            <option value="no">No</option>
+            <option value="yes">Yes</option>
+        </param>
+
+        <param name="tissue_type" type="select" label="Tissue Type">
+            <option value="Bladder">Bladder</option>
+            <option value="Blood-Lymphocyte">Blood-Lymphocyte</option>
+            <option value="Blood-Myeloid">Blood-Myeloid</option>
+            <option value="Brain-Cerebellum">Brain-Cerebellum</option>
+            <option value="Brain-Glioblastoma_Multiforme">Brain-Glioblastoma_Multiforme</option>
+            <option value="Brain-Lower_Grade_Glioma">Brain-Lower_Grade_Glioma</option>
+            <option value="Breast">Breast</option>
+            <option value="Cervix">Cervix</option>
+            <option value="Colon">Colon</option>
+            <option value="Head_and_Neck">Head_and_Neck</option>
+            <option value="Kidney-Chromophobe">Kidney-Chromophobe</option>
+            <option value="Kidney-Clear_Cell">Kidney-Clear_Cell</option>
+            <option value="Kidney-Papiallary_Cell">Kidney-Papiallary_Cell</option>
+            <option value="Liver-Nonviral">Liver-Nonviral</option>
+            <option value="Liver-Viral">Liver-Viral</option>
+            <option value="Lung-Adenocarcinoma">Lung-Adenocarcinoma</option>
+            <option value="Lung-Squamous_Cell">Lung-Squamous_Cell</option>
+            <option value="Melanoma">Melanoma</option>
+            <option value="Other">Other</option>
+            <option value="Ovary">Ovary</option>
+            <option value="Pancreas">Pancreas</option>
+            <option value="Prostate-Adenocarcinoma">Prostate-Adenocarcinoma</option>
+            <option value="Rectum">Rectum</option>
+            <option value="Skin">Skin</option>
+            <option value="Stomach">Stomach</option>
+            <option value="Thyroid">Thyroid</option>
+            <option value="Uterus">Uterus</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="gene_analysis_out"/>
+        <data format="tabular" name="variant_analysis_out" />
+        <data format="tabular" name="amino_acid_level_analysis_out" />
+        <data format="tabular" name="error_file"/>
+    </outputs>
+    <help>
+        **What it does**
+        * CHASM (Cancer-specific High-throughput Annotation of Somatic Mutations) is a method that predicts the functional significance of somatic missense variants
+        observed in the genomes of cancer cells, allowing variants to be prioritized in subsequent functional studies, based on the probability that they confer
+        increased fitness to a cancer cell. CHASM uses a machine learning method called Random Forest to distinguish between driver and passenger somatic missense variation.
+        The Random Forest is trained on a positive class of drivers curated from the COSMIC database and a negative class of passengers, generated in silico,
+        according to passenger base substitution frequencies estimated for a specific tumor type. Each variant is represented by a list of features,
+        including amino acid substitution properties, alignment-based estimates of conservation at the variant position, predicted local structure and annotations from
+        the UniProt Knowledgebase. Only missense mutations are analyzed by CHASM.  For more information on CHASM, please visit http://wiki.chasmsoftware.org
+
+        * SNVGet retrieves selected predictive features for a variant. Features can be broadly categorized into 3 types:
+            - Amino Acid Substitution features
+            - Protein-based position-specific features
+            - Exon-specific features
+            Only missense mutations are analyzed by SNVGet. For more information on SNVBox (database made with SNVGet), please visit http://wiki.chasmsoftware.org
+        * VEST is a method that predicts the functional effect of a variant.
+
+
+
+        **Citation**
+        If you use this Galaxy tool in work leading to a scientific publication please cite:
+
+        Carter, Hannah, et al. "Cancer-specific high-throughput annotation of somatic mutations: computational prediction of driver missense mutations."
+        Cancer research 69.16 (2009): 6660-6667.
+
+        Wong, Wing Chung, et al. "CHASM and SNVBox: toolkit for detecting biologically important single nucleotide mutations in cancer."
+        Bioinformatics 27.15 (2011): 2147-2148.
+</tool>