# HG changeset patch # User saketkc # Date 1397517371 14400 # Node ID b9de63c7255939162d16c5fe514a55195131f739 # Parent 8aceb7fc57f5a9cad5e5de4958f0887ec35c9ad8 Uploaded diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/README.rst Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,34 @@ +Galaxy wrapper for the CHASM webservice +=================================================== + +This tool is copyright 2014 by Saket Choudhary, Indian Institute of Technology Bombay +All rights reserved. MIT licensed. + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +Citations +=========== + + +If you use this Galaxy tool in work leading to a scientific publication please cite: + +Douville C, Carter H, Kim R, Niknafs N, Diekhans M, Stenson PD, Cooper DN, Ryan M, Karchin R (2013). CRAVAT: Cancer-Related Analysis of VAriants Toolkit Bioinformatics, 29(5):647-648. diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/chasm_webservice.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/chasm_webservice.py Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,279 @@ +#!/usr/bin/python +""" +The MIT License (MIT) + +Copyright (c) 2014 Saket Choudhary, + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" +import sys +import requests +import argparse +import time +from functools import wraps +import simplejson as json +import zipfile +import tempfile +import ntpath +import shutil +import xlrd +import csv +import os +sheet_map = {0: 'Variant_Analysis.csv', + 1: 'Amino_Acid_Level_Analysis.csv', 2: 'Gene_Level_Analysis.csv'} + + +def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): + '''Retry calling the decorated function using an exponential backoff. + + http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ + original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + + :param ExceptionToCheck: the exception to check. may be a tuple of + exceptions to check + :type ExceptionToCheck: Exception or tuple + :param tries: number of times to try (not retry) before giving up + :type tries: int + :param delay: initial delay between retries in seconds + :type delay: int + :param backoff: backoff multiplier e.g. value of 2 will double the delay + each retry + :type backoff: int + :param logger: logger to use. If None, print + :type logger: logging.Logger instance + ''' + def deco_retry(f): + + @wraps(f) + def f_retry(*args, **kwargs): + mtries, mdelay = tries, delay + while mtries > 1: + try: + return f(*args, **kwargs) + except ExceptionToCheck, e: + #msg = '%s, Retrying in %d seconds...' % (str(e), mdelay) + msg = 'Retrying in %d seconds...' % (mdelay) + if logger: + logger.warning(msg) + else: + print msg + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(*args, **kwargs) + + return f_retry # true decorator + + return deco_retry +CANCERTYPES = ['Bladder', 'Blood-Lymphocyte', 'Blood-Myeloid', + 'Brain-Cerebellum', 'Brain-Glioblastoma_Multiforme', + 'Brain-Lower_Grade_Glioma', 'Breast', 'Cervix', + 'Colon', 'Head_and_Neck', 'Kidney-Chromophobe', + 'Kidney-Clear_Cell', 'Kidney-Papiallary_Cell', + 'Liver-Nonviral', 'Liver-Viral', 'Lung-Adenocarcinoma', + 'Lung-Squamous_Cell', 'Melanoma', 'Other', 'Ovary', + 'Pancreas', 'Prostate-Adenocarcinoma', 'Rectum', + 'Skin', 'Stomach', 'Thyroid', 'Uterus'] + +__URL__ = 'http://www.cravat.us/rest/service/submit' + + +def stop_err(msg): + sys.stderr.write('%s\n' % msg) + sys.exit() + + +class CHASMWeb: + + def __init__(self, + mutationbox=None, filepath=None, + is_hg_18=None, analysis_type=None, + analysis_program=None, chosendb=None, + cancer_type=None, email=None, + annotate_genes=None, text_reports=None, + mupit_out=None): + self.mutationbox = mutationbox + self.filepath = filepath + self.is_hg_18 = is_hg_18 + self.analysis_type = analysis_type + self.analysis_program = analysis_program + self.chosendb = chosendb + self.email = email + self.annotate_genes = annotate_genes + self.cancer_type = cancer_type + self.email = email + self.annotate_genes = annotate_genes + self.text_reports = text_reports + self.mupit_input = mupit_out + + def make_request(self): + data = { + 'mutations ': self.mutationbox, + 'hg18': self.is_hg_18, + 'analysistype': self.analysis_type, + 'analysisitem': self.analysis_program, + 'chasmclassifier': self.cancer_type, + 'geneannotation': self.annotate_genes, + 'email': self.email, + 'tsvreport': 'on', # self.text_reports, + 'mupitinput': self.mupit_input, + } + stripped_data = {} + + for key, value in data.iteritems(): + if value is True: + value = 'on' + if value is not None and value is not False: + stripped_data[key] = value + + if not self.mutationbox: + file_payload = {'inputfile': open(self.filepath)} + request = requests.post( + __URL__, data=stripped_data, files=file_payload) + else: + request = requests.post( + __URL__, data=stripped_data, files=dict(foo='bar')) + job_id = json.loads(request.text)['jobid'] + return job_id + + @retry(requests.exceptions.HTTPError) + def zip_exists(self, job_id): + url = 'http://www.cravat.us/results/%s/%s.zip' % (job_id, job_id) + zip_download_request = requests.request('GET', url) + if zip_download_request.status_code == 404: + raise requests.HTTPError() + else: + return url + + def download_zip(self, url, job_id): + self.tmp_dir = tempfile.mkdtemp() + r = requests.get(url, stream=True) + if r.status_code == 200: + self.path = os.path.join(self.tmp_dir, job_id + '.zip') + with open(self.path, 'wb') as f: + for chunk in r.iter_content(128): + f.write(chunk) + else: + self.path = None + return self.path + + def move_files(self, file_map): + fh = open(self.path, 'rb') + zip_files = zipfile.ZipFile(fh) + for name in zip_files.namelist(): + filename = ntpath.basename(name) + extension = ntpath.splitext(filename)[-1] + source_file = zip_files.open(name) + if extension == '.txt': + target_file = open(file_map['error.txt'], 'wb') + elif filename != 'SnvGet Feature Description.xls' and extension != '.xls': + target_file = open(file_map[filename], 'wbb') + else: + target_file = None + if target_file: + with source_file, target_file: + shutil.copyfileobj(source_file, target_file) + if filename == 'SnvGet Feature Description.xls': + with xlrd.open_workbook(source_file) as wb: + sheet_names = wb.sheet_names() + for name in sheet_names: + sh = wb.sheet_by_name(name) + name_shortened = name.replace(' ').strip() + '.csv' + with open(name_shortened, 'wb') as f: + c = csv.writer(f) + for r in range(sh.nrows): + c.writerow(sh.row_values(r)) + shutil.rmtree(self.tmp_dir) + fh.close() + + +def main(params): + + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input', + type=str, dest='mutationbox', + help='Input variants') + parser.add_argument('--path', type=str, + dest='input_file_location', + help='Input file location') + parser.add_argument('--hg18', dest='hg18', + action='store_true') + parser.add_argument('--analysis_type', dest='analysis_type', + type=str, + choices=['driver', 'functional', + 'geneannotationonly'], + default='driver') + parser.add_argument('--chosendb', dest='chosendb', + type=str, nargs='*', + choices=['CHASM', 'SnvGet'], + default='CHASM') + parser.add_argument('--cancertype', dest='cancer_type', + type=str, choices=CANCERTYPES, + required=True) + parser.add_argument('--email', dest='email', + required=True, type=str) + parser.add_argument('--annotate', dest='annotate', + action='store_true', default=None) + parser.add_argument('--tsv_report', dest='tsv_report', + action='store_true', default=None) + parser.add_argument('--mupit_out', dest='mupit_out', + action='store_true', default=None) + parser.add_argument('--gene_analysis_out', dest='gene_analysis_out', + type=str, required=True) + parser.add_argument('--variant_analysis_out', + dest='variant_analysis_out', + type=str, required=True) + parser.add_argument('--amino_acid_level_analysis_out', + dest='amino_acid_level_analysis_out', + type=str, required=True,) + parser.add_argument('--error_file', dest='error_file_out', + type=str, required=True) + parser.add_argument('--snv_box_out', dest='snv_box_out', + type=str, required=False) + parser.add_argument('--snv_features', dest='snv_features_out', + type=str, required=False) + args = parser.parse_args(params) + chasm_web = CHASMWeb(mutationbox=args.mutationbox, + filepath=args.input_file_location, + is_hg_18=args.hg18, + analysis_type=args.analysis_type, + chosendb=args.chosendb, + cancer_type=args.cancer_type, + email=args.email, + annotate_genes=args.annotate, + text_reports=args.tsv_report, + mupit_out=args.mupit_out) + job_id = chasm_web.make_request() + file_map = {'Amino_Acid_Level_Analysis.Result.tsv': args.amino_acid_level_analysis_out, + 'SNVBox.tsv': args.snv_box_out, + 'Variant_Analysis.Result.tsv': args.variant_analysis_out, + 'Gene_Level_Analysis.Result.tsv': args.gene_analysis_out, + 'SnvGet Feature Description.xls': args.snv_features_out, + 'error.txt': args.error_file_out + } + url = chasm_web.zip_exists(job_id) + download = chasm_web.download_zip(url, job_id) + if download: + chasm_web.move_files(file_map=file_map) + else: + stop_err('Unable to download from the server') + +if __name__ == '__main__': + main(sys.argv[1:]) diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/chasm_webservice.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/chasm_webservice.xml Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,288 @@ + diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/chasm_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_input.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,5 @@ +TR1 chr22 30421786 + A T +TR2 chr22 29446079 + A G +TR3 chr22 29446079 + A G +TR4 chr22 40814500 - A G +TR5 chr22 40815256 + C T diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/chasm_output_aminoacids.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_aminoacids.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,13 @@ +#Amino Acid Level Analysis Report +#2014-04-14 18:52:00.963260 +#Analysis done at http://www.cravat.us. +#Input file: dataset_34.dat +#This report shows analysis results at amino acid level. +#Input coordinate was hg19 genomic. +#For more information on CRAVAT, visit http://www.cravat.us. +# +Transcript Amino acid position Sequence Ontology Reference amino acid(s) Alternate amino acid(s) HUGO symbol Other transcripts Occurrences in study [amino acid change] Transcript by COSMIC AA change by COSMIC Occurrences in COSMIC [amino acid change] Occurrences in COSMIC by primary sites [amino acid change] +NM_021090.3 1198 MS N I MTMR3 NM_153051.2:aAc>aTc:N1161I, NM_153050.2:aAc>aTc:N1170I, ENST00000323630:aAc>aTc:N1062I, ENST00000351488:aAc>aTc:N1161I, ENST00000333027:aAc>aTc:N1170I, ENST00000406629:aAc>aTc:N1170I, ENST00000401950:aAc>aTc:N1198I 1 0 +NM_001206998.1 637 MS H R ZNRF3 NM_032173.3:cAc>cGc:H537R, ENST00000406323:cAc>cGc:H537R, ENST00000332811:cAc>cGc:H537R, ENST00000402174:cAc>cGc:H537R, ENST00000544604:cAc>cGc:H637R 2 0 +NM_020831.3 648 MS S G MKL1 ENST00000396617:Agc>Ggc:S648G, ENST00000402042:Agc>Ggc:S598G, ENST00000407029:Agc>Ggc:S648G, ENST00000355630:Agc>Ggc:S648G 1 ENST00000355630 p.S648G (stomach 1) 1 stomach(1 ) +NM_020831.3 396 MS A T MKL1 ENST00000396617:Gcc>Acc:A396T, ENST00000402042:Gcc>Acc:A346T, ENST00000407029:Gcc>Acc:A396T, ENST00000355630:Gcc>Acc:A396T 1 0 diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/chasm_output_error.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_error.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,1 @@ +# End of input format error output. If nothing is above this line, there was no format error in the input. \ No newline at end of file diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/chasm_output_genes.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_genes.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,14 @@ +#Gene Level Analysis Report +#2014-04-14 18:52:00.963904 +#Analysis done at http://www.cravat.us. +#Input file: dataset_34.dat +#This report shows analysis results at gene level. +#The composite p-value (Stouffer's combined p-value) and composite FDR of a gene show how probable it is to get the same p-value distribution for the gene as that obtained from the input variants by chance. +#Input coordinate was hg19 genomic. +#Tissue type for CHASM analysis: Other +#For more information on CRAVAT, visit http://www.cravat.us. +# +HUGO symbol Best driver score from representative transcripts Composite p value Composite FDR Occurrences in study [gene mutated] Occurrences in COSMIC [gene mutated] Occurrences in COSMIC by primary sites [gene mutated] +MTMR3 0.6 0.3364 0.55 1 102 upper_aerodigestive_tract(2);large_intestine(24);haematopoietic_and_lymphoid_tissue(5);endometrium(12);urinary_tract(4);oesophagus(1);liver(3);skin(17);lung(17);ovary(5);prostate(2);kidney(4);breast(6 ) +ZNRF3 0.514 0.171 0.55 2 73 upper_aerodigestive_tract(2);large_intestine(19);autonomic_ganglia(1);haematopoietic_and_lymphoid_tissue(2);endometrium(12);urinary_tract(2);lung(16);liver(5);skin(1);oesophagus(3);ovary(2);NS(1);prostate(2);kidney(3);breast(2 ) +MKL1 0.728 0.793956248886469 1 2 68 large_intestine(15);stomach(2);central_nervous_system(1);haematopoietic_and_lymphoid_tissue(3);endometrium(13);urinary_tract(1);lung(19);breast(3);skin(1);ovary(5);kidney(4);liver(1 ) diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/chasm_output_variants.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/chasm_output_variants.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,15 @@ +#Variant Level Analysis Report +#2014-04-14 18:52:00.964521 +#Analysis done at http://www.cravat.us. +#Input file: dataset_34.dat +#This report shows analysis results at variant level. +#Input coordinate was hg19 genomic. +#Tissue type for CHASM analysis: Other +#For more information on CRAVAT, visit http://www.cravat.us. +# +ID Chromosome Position Strand Reference base Alternate base Sample ID HUGO symbol Transcript Transcript strand Codon change Amino acid position Sequence Ontology Reference amino acid(s) Alternate amino acid(s) Other transcripts Mappability Warning Driver score of representative transcript (driver mutations close to 0) Empirical p-value FDR (Benjamini-Hochberg) (not available with less than 10 unique mutations) Best driver score and transcript All transcripts and driver scores dbSNP 1000 Genomes allele frequency ESP6500 allele frequency (European American) ESP6500 allele frequency (African American) Occurrences in study [exact nucleotide change] Transcript by COSMIC AA change by COSMIC Occurrences in COSMIC [exact nucleotide change] Occurrences in COSMIC by primary sites [exact nucleotide change] +TR1 CHR22 30421786 + A T No_sample_ID MTMR3 NM_021090.3 + aAc>aTc 1198 MS N I NM_153051.2:aAc>aTc:N1161I, NM_153050.2:aAc>aTc:N1170I, ENST00000323630:aAc>aTc:N1062I, ENST00000351488:aAc>aTc:N1161I, ENST00000333027:aAc>aTc:N1170I, ENST00000406629:aAc>aTc:N1170I, ENST00000401950:aAc>aTc:N1198I 0.6(NM_021090.3:N1198I) 0.3364 0.60 0.572(ENST00000323630:N1062I) ENST00000323630:N1062I(0.572),ENST00000406629:N1170I(0.602),ENST00000401950:N1198I(0.6),ENST00000351488:N1161I(0.572),NM_153050.2:N1170I(0.572),ENST00000333027:N1170I(0.572),NM_021090.3:N1198I(0.6),NM_153051.2:N1161I(0.572) rs75623810 0.00778388278388 0 0.0226963 1 +TR2 CHR22 29446079 + A G No_sample_ID ZNRF3 NM_001206998.1 + cAc>cGc 637 MS H R NM_032173.3:cAc>cGc:H537R, ENST00000406323:cAc>cGc:H537R, ENST00000332811:cAc>cGc:H537R, ENST00000402174:cAc>cGc:H537R, ENST00000544604:cAc>cGc:H637R 0.514(NM_001206998.1:H637R) 0.1710 0.60 0.442(ENST00000544604:H637R) ENST00000544604:H637R(0.442),NM_032173.3:H537R(0.514),ENST00000402174:H537R(0.514),ENST00000406323:H537R(0.514),NM_001206998.1:H637R(0.514),ENST00000332811:H537R(0.514) rs62641746 0 0.00569767 0.000907853 2 +TR3 CHR22 29446079 + A G No_sample_ID ZNRF3 NM_001206998.1 + cAc>cGc 637 MS H R NM_032173.3:cAc>cGc:H537R, ENST00000406323:cAc>cGc:H537R, ENST00000332811:cAc>cGc:H537R, ENST00000402174:cAc>cGc:H537R, ENST00000544604:cAc>cGc:H637R 0.514(NM_001206998.1:H637R) 0.1710 0.60 0.442(ENST00000544604:H637R) ENST00000544604:H637R(0.442),NM_032173.3:H537R(0.514),ENST00000402174:H537R(0.514),ENST00000406323:H537R(0.514),NM_001206998.1:H637R(0.514),ENST00000332811:H537R(0.514) rs62641746 0 0.00569767 0.000907853 2 +TR4 CHR22 40814500 - A G No_sample_ID MKL1 NM_020831.3 - Agc>Ggc 648 MS S G ENST00000396617:Agc>Ggc:S648G, ENST00000402042:Agc>Ggc:S598G, ENST00000407029:Agc>Ggc:S648G, ENST00000355630:Agc>Ggc:S648G 0.728(NM_020831.3:S648G) 0.6890 0.80 0.708(ENST00000402042:S598G) ENST00000407029:S648G(0.728),NM_020831.3:S648G(0.728),ENST00000396617:S648G(0.728),ENST00000355630:S648G(0.728),ENST00000402042:S598G(0.708) rs878756 0.419871794872 0.396977 0.860645 1 ENST00000355630 p.S648G (stomach 1) 1 stomach(1) +TR5 CHR22 40815256 + C T No_sample_ID MKL1 NM_020831.3 - Gcc>Acc 396 MS A T ENST00000396617:Gcc>Acc:A396T, ENST00000402042:Gcc>Acc:A346T, ENST00000407029:Gcc>Acc:A396T, ENST00000355630:Gcc>Acc:A396T 0.748(NM_020831.3:A396T) 0.7476 0.80 0.748(ENST00000407029:A396T) ENST00000407029:A396T(0.748),NM_020831.3:A396T(0.748),ENST00000396617:A396T(0.748),ENST00000355630:A396T(0.748),ENST00000402042:A346T(0.754) rs34736200 0.0201465201465 0.00127907 0.0832955 1 diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/ma_nucleotide_output.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/ma_nucleotide_output.csv Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,4 @@ +"","Mutation","AA variant","Gene","MSA","PDB","Func. Impact","FI score","Uniprot","Refseq","MSA height","Codon start position","Func. region","Protein bind.site","DNA/RNA bind.site","small.mol bind.site" +"1","hg19,13,32912555,G,T","D1355Y","BRCA2","http://getma.org/?cm=msa&ty=f&p=BRCA2_HUMAN&rb=1247&re=1420&var=D1355Y","","low","1.24","BRCA2_HUMAN","NP_000050","14","chr13:32912555","","","","" +"2","hg19,7,55178574,G,A","","","","","","","","","0","","","","","" +"3","hg19,7,55178574,G,A","","","","","","","","","0","","","","","" diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/ma_proper_nucleotide.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/ma_proper_nucleotide.csv Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,3 @@ +13,32912555,G,T BRCA2 +7,55178574,G,A GBM +7,55178574,G,A GBM diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/ma_proper_protein.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/ma_proper_protein.csv Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,6 @@ +EGFR_HUMAN,R521K +EGFR_HUMAN,R98Q,Polymorphism +EGFR_HUMAN,G719D,disease +NP_000537,G356A +NP_000537,G360A,dbSNP:rs35993958 +NP_000537,S46A,Abolishes,phosphorylation diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/ma_proper_protein.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/ma_proper_protein.tsv Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,6 @@ +EGFR_HUMAN R521K +EGFR_HUMAN R98Q Polymorphism +EGFR_HUMAN G719D disease +NP_000537 G356A +NP_000537 G360A dbSNP:rs35993958 +NP_000537 S46A Abolishes phosphorylation diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/ma_protein_output.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/ma_protein_output.csv Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,7 @@ +"","Mutation","AA variant","Gene","MSA","PDB","Func. Impact","FI score","Uniprot","Refseq","MSA height","Codon start position","Func. region","Protein bind.site","DNA/RNA bind.site","small.mol bind.site" +"1","EGFR_HUMAN R521K","R521K","EGFR","http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=482&re=681&var=R521K","http://getma.org/pdb.php?prot=EGFR_HUMAN&from=482&to=681&var=R521K","neutral","0.405","EGFR_HUMAN","NP_005219","399","chr7:55196748","1","1","","" +"2","EGFR_HUMAN R98Q","R98Q","EGFR","http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=57&re=168&var=R98Q","http://getma.org/pdb.php?prot=EGFR_HUMAN&from=57&to=168&var=R98Q","neutral","0.6","EGFR_HUMAN","NP_005219","181","chr7:55178543","","","","" +"3","EGFR_HUMAN G719D","G719D","EGFR","http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=G719D","http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=G719D","medium","3.115","EGFR_HUMAN","NP_005219","700","chr7:55209201","1","1","","0UN IRE FMM ANP CY7 HYZ 03P ITI DKI 685 T95 T74 ZZY M97 6XP 0K0 KRW 0JJ 0K1 P17 112 1N1 JIN STI P5C 585 S19 P16 VX6 P3Y SX7 ACK B90 AMP ZD6 STU 7PY BI9 BII ATP ADP 4ST VG8 YAM P1E 7X4 7X5 7X6 7X8 349 3JZ" +"4","NP_000537 G356A","G356A","TP53","http://getma.org/?cm=msa&ty=f&p=P53_HUMAN&rb=318&re=359&var=G356A","http://getma.org/pdb.php?prot=P53_HUMAN&from=318&to=359&var=G356A","neutral","-0.895","P53_HUMAN","NP_000537","45","chr17:7514684","","1","","" +"5","NP_000537 G360A","G360A","TP53","http://getma.org/?cm=msa&ty=f&p=P53_HUMAN&rb=360&re=393&var=G360A","","medium","2.25","P53_HUMAN","NP_000537","15","chr17:7514672","","","","" +"6","NP_000537 S46A","S46A","TP53","http://getma.org/?cm=msa&ty=f&p=P53_HUMAN&rb=30&re=94&var=S46A","http://getma.org/pdb.php?prot=P53_HUMAN&from=30&to=94&var=S46A","neutral","0.55","P53_HUMAN","NP_000537","5","chr17:7520274","1","1","","" diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/polyphen2_full.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/polyphen2_full.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,23 @@ +#o_acc o_pos o_aa1 o_aa2 rsid acc pos aa1 aa2 nt1 nt2 prediction based_on effect pph2_class pph2_prob pph2_FPR pph2_TPR pph2_FDR site region PHAT dScore Score1 Score2 MSAv Nobs Nstruct Nfilt PDB_id PDB_pos PDB_ch ident length NormASA SecStr MapReg dVol dProp B-fact H-bonds AveNHet MinDHet AveNInt MinDInt AveNSit MinDSit Transv CodPos CpG MinDJxn PfamHit IdPmax IdPSNP IdQmin +Q13615-2 1170 N I ? Q13615-2 1170 N I A T probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 ? ? ? +2.214 -1.705 -3.919 2 37 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 1 0 -2313 ? 1.268 ? 47.09 # chr22:30421786|AT|uc003agu.3+|MTMR3|NP_694690 +Q13615 1198 N I ? Q13615 1198 N I A T probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 NO NO ? +2.296 -1.580 -3.876 2 38 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 1 0 -3099 NO 1.010 ? 45.58 # chr22:30421786|AT|uc003agv.3+|MTMR3|NP_066576 +Q13615-3 1161 N I ? Q13615-3 1161 N I A T probably damaging alignment ? deleterious 0.998 0.0112 0.273 0.0274 ? ? ? +2.214 -1.705 -3.919 2 37 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 1 0 -3099 ? 1.275 ? 47.37 # chr22:30421786|AT|uc003agw.3+|MTMR3|NP_694691 +Q9ULT6 637 H R ? Q9ULT6 637 H R A G benign alignment ? neutral 0.002 0.704 0.987 0.452 NO NO ? +0.398 -2.258 -2.656 2 47 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 2 +858 NO 20.363 20.363 77.46 # chr22:29446079|AG|uc003aeg.2+|ZNRF3|NP_115549 +Q9ULT6 637 H R ? Q9ULT6 637 H R A G benign alignment ? neutral 0.002 0.704 0.987 0.452 NO NO ? +0.398 -2.258 -2.656 2 47 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 1 2 -1599 NO 20.363 20.363 77.46 # chr22:29446079|AG|uc003aeh.1+|ZNRF3|NP_115549 +Q969V6 648 S C ? Q969V6 648 S C A T possibly damaging alignment ? deleterious 0.89 0.0639 0.821 0.0953 NO COMPBIAS ? +2.837 -1.909 -4.746 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.320 ? 90.33 # chr22:40814500|TA|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R A C benign alignment ? neutral 0.167 0.131 0.92 0.162 NO COMPBIAS ? +1.814 -1.909 -3.723 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 2.525 ? 90.33 # chr22:40814500|TG|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S C ? Q969V6 648 S C A T possibly damaging alignment ? deleterious 0.89 0.0639 0.821 0.0953 NO COMPBIAS ? +2.837 -1.909 -4.746 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.320 ? 90.33 # chr22:40814500|TA|uc003ayw.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R A C benign alignment ? neutral 0.167 0.131 0.92 0.162 NO COMPBIAS ? +1.814 -1.909 -3.723 2 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 2.525 ? 90.33 # chr22:40814500|TG|uc003ayw.1-|MKL1|NP_065882 +E7ER32 648 S C ? E7ER32 648 S C A T possibly damaging alignment ? deleterious 0.953 0.0514 0.788 0.0812 NO NO ? +2.837 -1.909 -4.746 2 33 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.255 ? 87.22 # chr22:40814500|TA|uc010gye.1-|MKL1| +E7ER32 648 S R ? E7ER32 648 S R A C benign alignment ? neutral 0.337 0.111 0.901 0.142 NO NO ? +1.814 -1.909 -3.723 2 33 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 2.402 ? 87.22 # chr22:40814500|TG|uc010gye.1-|MKL1| +B0QY83 598 S C ? B0QY83 598 S C A T possibly damaging alignment_mz ? deleterious 0.726 0.0797 0.856 0.112 NO NO ? +2.847 -1.931 -4.778 3 31 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 0 +123 NO 1.615 ? 91.49 # chr22:40814500|TA|uc010gyf.1-|MKL1|NP_065882 +B0QY83 598 S R ? B0QY83 598 S R A C benign alignment_mz ? neutral 0.047 0.168 0.942 0.195 NO NO ? +1.674 -1.931 -3.605 3 31 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1 0 2 +123 NO 5.560 ? 91.49 # chr22:40814500|TG|uc010gyf.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T G A benign alignment ? neutral 0.009 0.233 0.961 0.247 NO NO ? +0.097 -1.540 -1.637 2 39 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 21.659 21.659 88.08 # chr22:40815256|CT|uc003ayv.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T G A benign alignment ? neutral 0.009 0.233 0.961 0.247 NO NO ? +0.097 -1.540 -1.637 2 39 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 21.659 21.659 88.08 # chr22:40815256|CT|uc003ayw.1-|MKL1|NP_065882 +E7ER32 396 A T ? E7ER32 396 A T G A benign alignment ? neutral 0.009 0.233 0.961 0.247 NO NO ? +0.097 -1.540 -1.637 2 39 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 20.554 20.554 83.58 # chr22:40815256|CT|uc010gye.1-|MKL1| +B0QY83 346 A T ? B0QY83 346 A T G A benign alignment_mz ? neutral 0.008 0.239 0.963 0.252 NO NO ? +0.456 -1.547 -2.003 3 32 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0 0 1 +879 NO 21.940 21.940 89.22 # chr22:40815256|CT|uc010gyf.1-|MKL1|NP_065882 +## Sources: +## Predictions: PolyPhen-2 v2.2.2r398 +## Sequences: UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011) +## Structures: PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures) +## Genes: UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009) diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/polyphen2_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/polyphen2_input.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,5 @@ +chr22:30421786 A/T +chr22:29446079 A/G +chr22:40814500 A/G +chr22:40815256 C/T + diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/polyphen2_log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/polyphen2_log.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,38 @@ +=========================== +Stage 1/7: Validating input +=========================== +No errors + +=============================== +Stage 2/7: Mapping genomic SNPs +=============================== +WARNING: (chr22:40814500 - uc003ayv.1) None of the input alleles (A/G) matches reference allele (T) +WARNING: (chr22:40814500 - uc003ayw.1) None of the input alleles (A/G) matches reference allele (T) +WARNING: (chr22:40814500 - uc010gye.1) None of the input alleles (A/G) matches reference allele (T) +WARNING: (chr22:40814500 - uc010gyf.1) None of the input alleles (A/G) matches reference allele (T) +Total errors/warnings: 4 + +============================ +Stage 3/7: Collecting output +============================ +No errors + +=============================================== +Stage 4/7: Building MSA and annotating proteins +=============================================== +No errors + +============================ +Stage 5/7: Collecting output +============================ +No errors + +===================== +Stage 6/7: Predicting +===================== +No errors + +============================= +Stage 7/7: Generating reports +============================= +No errors diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/polyphen2_short.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/polyphen2_short.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,23 @@ +#o_acc o_pos o_aa1 o_aa2 rsid acc pos aa1 aa2 prediction pph2_prob pph2_FPR pph2_TPR +Q13615-2 1170 N I ? Q13615-2 1170 N I probably damaging 0.998 0.0112 0.273 # chr22:30421786|AT|uc003agu.3+|MTMR3|NP_694690 +Q13615 1198 N I ? Q13615 1198 N I probably damaging 0.998 0.0112 0.273 # chr22:30421786|AT|uc003agv.3+|MTMR3|NP_066576 +Q13615-3 1161 N I ? Q13615-3 1161 N I probably damaging 0.998 0.0112 0.273 # chr22:30421786|AT|uc003agw.3+|MTMR3|NP_694691 +Q9ULT6 637 H R ? Q9ULT6 637 H R benign 0.002 0.704 0.987 # chr22:29446079|AG|uc003aeg.2+|ZNRF3|NP_115549 +Q9ULT6 637 H R ? Q9ULT6 637 H R benign 0.002 0.704 0.987 # chr22:29446079|AG|uc003aeh.1+|ZNRF3|NP_115549 +Q969V6 648 S C ? Q969V6 648 S C possibly damaging 0.89 0.0639 0.821 # chr22:40814500|TA|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R benign 0.167 0.131 0.92 # chr22:40814500|TG|uc003ayv.1-|MKL1|NP_065882 +Q969V6 648 S C ? Q969V6 648 S C possibly damaging 0.89 0.0639 0.821 # chr22:40814500|TA|uc003ayw.1-|MKL1|NP_065882 +Q969V6 648 S R ? Q969V6 648 S R benign 0.167 0.131 0.92 # chr22:40814500|TG|uc003ayw.1-|MKL1|NP_065882 +E7ER32 648 S C ? E7ER32 648 S C possibly damaging 0.953 0.0514 0.788 # chr22:40814500|TA|uc010gye.1-|MKL1| +E7ER32 648 S R ? E7ER32 648 S R benign 0.337 0.111 0.901 # chr22:40814500|TG|uc010gye.1-|MKL1| +B0QY83 598 S C ? B0QY83 598 S C possibly damaging 0.726 0.0797 0.856 # chr22:40814500|TA|uc010gyf.1-|MKL1|NP_065882 +B0QY83 598 S R ? B0QY83 598 S R benign 0.047 0.168 0.942 # chr22:40814500|TG|uc010gyf.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T benign 0.009 0.233 0.961 # chr22:40815256|CT|uc003ayv.1-|MKL1|NP_065882 +Q969V6 396 A T ? Q969V6 396 A T benign 0.009 0.233 0.961 # chr22:40815256|CT|uc003ayw.1-|MKL1|NP_065882 +E7ER32 396 A T ? E7ER32 396 A T benign 0.009 0.233 0.961 # chr22:40815256|CT|uc010gye.1-|MKL1| +B0QY83 346 A T ? B0QY83 346 A T benign 0.008 0.239 0.963 # chr22:40815256|CT|uc010gyf.1-|MKL1|NP_065882 +## Sources: +## Predictions: PolyPhen-2 v2.2.2r398 +## Sequences: UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011) +## Structures: PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures) +## Genes: UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009) diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/test-data/polyphen2_snp.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/test-data/polyphen2_snp.txt Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,28 @@ +#snp_pos str gene transcript ccid ccds cciden refa type ntpos nt1 nt2 flanks trv cpg jxdon jxacc exon cexon jxc dgn cdnpos frame cdn1 cdn2 aa1 aa2 aapos spmap spacc spname refs_acc dbrsid dbobsrvd dbavHet dbavHetSE dbRmPaPt comments +chr22:30421786 + MTMR3 uc003agu.3 ? CCDS13871.1 1 A/T missense 142629 A T AC 1 0 -2313 -168 20/20 18/18 ? 0 1170 1 AAC ATC N I 1170 1 Q13615-2 MTMR3_HUMAN NP_694690 rs75623810 A/T 0.016564 0.089485 A>A>A +chr22:30421786 + MTMR3 uc003agv.3 16552 CCDS13870.1 1 A/T missense 142629 A T AC 1 0 -3099 -168 20/20 18/18 ? 0 1198 1 AAC ATC N I 1198 1 Q13615 MTMR3_HUMAN NP_066576 rs75623810 A/T 0.016564 0.089485 A>A>A +chr22:30421786 + MTMR3 uc003agw.3 ? CCDS46682.1 1 A/T missense 142629 A T AC 1 0 -3099 -168 19/19 17/17 ? 0 1161 1 AAC ATC N I 1161 1 Q13615-3 MTMR3_HUMAN NP_694691 rs75623810 A/T 0.016564 0.089485 A>A>A +chr22:29446079 + ZNRF3 uc003aeg.2 16531 CCDS42999.1 1 A/G missense 166190 A G CC 0 2 +858 -895 8/9 7/8 ? 0 537 1 CAC CGC H R 637 1 Q9ULT6 ZNRF3_HUMAN NP_115549 rs62641746 A/G 0.030762 0.120144 A>A>A +chr22:29446079 + ZNRF3 uc003aeh.1 ? CCDS42999.1 0.982 A/G missense 63040 A G CC 0 2 -1599 -895 7/7 7/7 ? 0 537 1 CAC CGC H R 637 1 Q9ULT6 ZNRF3_HUMAN NP_115549 rs62641746 A/G 0.030762 0.120144 A>A>A +chr22:40814500 - MKL1 uc003ayv.1 ? CCDS14003.1 1 T/A missense 44939 A T CG 1 0 +123 -889 9/12 9/12 ? 0 648 0 AGC TGC S C 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc003ayv.1 ? CCDS14003.1 1 T/G missense 44939 A C CG 1 2 +123 -889 9/12 9/12 ? 0 648 0 AGC CGC S R 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc003ayw.1 16752 CCDS14003.1 1 T/A missense 218191 A T CG 1 0 +123 -889 12/15 9/12 ? 0 648 0 AGC TGC S C 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc003ayw.1 16752 CCDS14003.1 1 T/G missense 218191 A C CG 1 2 +123 -889 12/15 9/12 ? 0 648 0 AGC CGC S R 648 1 Q969V6 MKL1_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc010gye.1 ? ? ? T/A missense 218191 A T CG 1 0 +123 -889 12/15 9/12 ? 0 648 0 AGC TGC S C 648 1 E7ER32 E7ER32_HUMAN ? ? ? ? ? ? +chr22:40814500 - MKL1 uc010gye.1 ? ? ? T/G missense 218191 A C CG 1 2 +123 -889 12/15 9/12 ? 0 648 0 AGC CGC S R 648 1 E7ER32 E7ER32_HUMAN ? ? ? ? ? ? +chr22:40814500 - MKL1 uc010gyf.1 ? ? ? T/A missense 218191 A T CG 1 0 +123 -889 11/14 8/11 ? 0 598 0 AGC TGC S C 598 1 B0QY83 B0QY83_HUMAN NP_065882 ? ? ? ? ? +chr22:40814500 - MKL1 uc010gyf.1 ? ? ? T/G missense 218191 A C CG 1 2 +123 -889 11/14 8/11 ? 0 598 0 AGC CGC S R 598 1 B0QY83 B0QY83_HUMAN NP_065882 ? ? ? ? ? +chr22:40815256 - MKL1 uc003ayv.1 ? CCDS14003.1 1 C/T missense 44183 G A CC 0 1 +879 -133 9/12 9/12 ? 0 396 0 GCC ACC A T 396 1 Q969V6 MKL1_HUMAN NP_065882 rs34736200 G/A 0.047299 0.14633 A>A>A +chr22:40815256 - MKL1 uc003ayw.1 16752 CCDS14003.1 1 C/T missense 217435 G A CC 0 1 +879 -133 12/15 9/12 ? 0 396 0 GCC ACC A T 396 1 Q969V6 MKL1_HUMAN NP_065882 rs34736200 G/A 0.047299 0.14633 A>A>A +chr22:40815256 - MKL1 uc010gye.1 ? ? ? C/T missense 217435 G A CC 0 1 +879 -133 12/15 9/12 ? 0 396 0 GCC ACC A T 396 1 E7ER32 E7ER32_HUMAN ? rs34736200 G/A 0.047299 0.14633 A>A>A +chr22:40815256 - MKL1 uc010gyf.1 ? ? ? C/T missense 217435 G A CC 0 1 +879 -133 11/14 8/11 ? 0 346 0 GCC ACC A T 346 1 B0QY83 B0QY83_HUMAN NP_065882 rs34736200 G/A 0.047299 0.14633 A>A>A +## Totals: +## lines input 4 +## lines skipped 0 +## alleles annotated 17 +## missense 17 +## nonsense 0 +## coding-synon 0 +## intron 0 +## utr-3 0 +## utr-5 0 diff -r 8aceb7fc57f5 -r b9de63c72559 chasm_webservice/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chasm_webservice/tool_dependencies.xml Mon Apr 14 19:16:11 2014 -0400 @@ -0,0 +1,6 @@ + + + + + + diff -r 8aceb7fc57f5 -r b9de63c72559 tools/chasm/README.rst --- a/tools/chasm/README.rst Fri Nov 01 06:01:52 2013 +0530 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -Galaxy wrapper for the CHASM webservice at CRAVAT(v2.0) -=================================================== - -This tool is copyright 2013 by Saket Choudhary, Indian Institute of Technology Bombay -All rights reserved. MIT licensed. - -Licence (MIT) -============= - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -Citations -=========== - - -If you use this Galaxy tool in work leading to a scientific publication please cite: - -Carter, Hannah, et al. "Cancer-specific high-throughput annotation of somatic mutations: computational prediction of driver missense mutations." -Cancer research 69.16 (2009): 6660-6667. - -Wong, Wing Chung, et al. "CHASM and SNVBox: toolkit for detecting biologically important single nucleotide mutations in cancer." -Bioinformatics 27.15 (2011): 2147-2148. diff -r 8aceb7fc57f5 -r b9de63c72559 tools/chasm/chasm_web.py --- a/tools/chasm/chasm_web.py Fri Nov 01 06:01:52 2013 +0530 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,222 +0,0 @@ -#!/usr/bin/python -""" -The MIT License (MIT) - -Copyright (c) 2013 Saket Choudhary, - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -""" -import requests -import argparse -import sys -import time -from functools import wraps -import simplejson as json -import zipfile -import tempfile, ntpath, shutil -import xlrd -import csv -import os -sheet_map = {0:"Variant_Analysis.csv",1:"Amino_Acid_Level_Analysis.csv",2:"Gene_Level_Analysis.csv"} -def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): - """Retry calling the decorated function using an exponential backoff. - - http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ - original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry - - :param ExceptionToCheck: the exception to check. may be a tuple of - exceptions to check - :type ExceptionToCheck: Exception or tuple - :param tries: number of times to try (not retry) before giving up - :type tries: int - :param delay: initial delay between retries in seconds - :type delay: int - :param backoff: backoff multiplier e.g. value of 2 will double the delay - each retry - :type backoff: int - :param logger: logger to use. If None, print - :type logger: logging.Logger instance - """ - def deco_retry(f): - - @wraps(f) - def f_retry(*args, **kwargs): - mtries, mdelay = tries, delay - while mtries > 1: - try: - return f(*args, **kwargs) - except ExceptionToCheck, e: - #msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) - msg = "Retrying in %d seconds..." % (mdelay) - if logger: - logger.warning(msg) - else: - print msg - time.sleep(mdelay) - mtries -= 1 - mdelay *= backoff - return f(*args, **kwargs) - - return f_retry # true decorator - - return deco_retry -CANCERTYPES =["Bladder","Blood-Lymphocyte","Blood-Myeloid","Brain-Cerebellum","Brain-Glioblastoma_Multiforme","Brain-Lower_Grade_Glioma","Breast","Cervix","Colon","Head_and_Neck","Kidney-Chromophobe","Kidney-Clear_Cell","Kidney-Papiallary_Cell","Liver-Nonviral","Liver-Viral","Lung-Adenocarcinoma","Lung-Squamous_Cell","Melanoma","Other","Ovary","Pancreas","Prostate-Adenocarcinoma","Rectum","Skin","Stomach","Thyroid","Uterus"] -URL="http://www.cravat.us/ClassifierSelect1" -def stop_err( msg ): - sys.stderr.write( '%s\n' % msg ) - sys.exit() - -class CHASMWeb: - def __init__( self, mutationbox=None, filepath=None,is_hg_18=None, analysis_type=None, analysis_program=None, chosendb=None, cancer_type=None, email=None, annotate_genes=None, text_reports=None, mupit_out=None ): - self.mutationbox = mutationbox - self.filepath = filepath - self.is_hg_18 = is_hg_18 - self.analysis_type = analysis_type - self.analysis_program = analysis_program - self.chosendb = chosendb - self.email = email - self.annotate_genes = annotate_genes - self.cancer_type = cancer_type - self.email = email - self.annotate_genes = annotate_genes - self.text_reports=text_reports - self.mupit_input= mupit_out - def make_request( self ): - data = { - "mutationbox":self.mutationbox, - "hg18": self.is_hg_18, - "analysistype": self.analysis_type, - "chosendb": self.analysis_program, - "cancertype": self.cancer_type, - "geneannotcheckbox": self.annotate_genes, - "emailbox": self.email, - "tsvreport": self.text_reports, - "mupitinput": self.mupit_input, - } - stripped_data = {} - - for key,value in data.iteritems(): - if value==True: - value="on" - if value!=None and value!=False: - stripped_data[key]=value - #print stripped_data - if not self.mutationbox: - file_payload={"inputfile":open(self.filepath)} - request = requests.post(URL, data=stripped_data, files=file_payload) - else: - request = requests.post(URL, data=stripped_data, files=dict(foo='bar')) - job_id = json.loads(request.text)["jobId"] - return job_id - @retry(requests.exceptions.HTTPError) - def zip_exists(self,job_id ): - url="http://www.cravat.us/results/%s/%s.zip" %(job_id,job_id) - zip_download_request = requests.request("GET", url) - if zip_download_request.status_code==404: - raise requests.HTTPError() - else: - return url - def download_zip( self, url, job_id): - self.tmp_dir = tempfile.mkdtemp() - r = requests.get( url, stream=True ) - if r.status_code == 200: - self.path = os.path.join( self.tmp_dir,job_id+".zip" ) - with open(self.path, 'wb') as f: - for chunk in r.iter_content(128): - f.write(chunk) - else: - self.path = None - return self.path - - def move_files( self, file_map ): - fh = open(self.path,"rb") - zip_files = zipfile.ZipFile(fh) - for name in zip_files.namelist(): - filename = ntpath.basename(name) - extension = ntpath.splitext(filename)[-1] - source_file = zip_files.open(name) - if extension==".txt": - target_file = open(file_map["error.txt"],"wb") - elif filename!="SnvGet Feature Description.xls" and extension!=".xls": - target_file = open(file_map[filename],"wbb")#file(os.path.join(output_dir,filename),"wb") - else: - target_file=None - if target_file: - with source_file,target_file: - shutil.copyfileobj(source_file,target_file) - if filename=="SnvGet Feature Description.xls": - with xlrd.open_workbook(source_file) as wb: - sheet_names = wb.sheet_names() - for name in sheet_names: - sh=wb.sheet_by_name(name) - name_shortened= name.replace(" ").strip()+".csv" - with open(name_shortened,'wb') as f: - c = csv.writer(f) - for r in range(sh.nrows): - c.writerow(sh.row_values(r)) - shutil.rmtree(self.tmp_dir) - fh.close() -def main(params): - - parser = argparse.ArgumentParser() - parser.add_argument("-i","--input", type=str, dest="mutationbox", help="Input variants") - parser.add_argument("--path",type=str,dest="input_file_location", help="Input file location") - parser.add_argument("--hg18", dest="hg18", action="store_true") - parser.add_argument("--analysis_type",dest="analysis_type",type=str,choices=["driver","functional","geneannotationonly"], default="driver") - parser.add_argument("--chosendb",dest="chosendb",type=str,nargs="*",choices=["CHASM","SnvGet"], default="CHASM") - parser.add_argument("--cancertype", dest="cancer_type", type=str, choices=CANCERTYPES, required=True) - parser.add_argument("--email", dest="email", required=True, type=str) - parser.add_argument("--annotate", dest="annotate", action="store_true", default=None ) - parser.add_argument("--tsv_report", dest="tsv_report", action="store_true", default=None ) - parser.add_argument("--mupit_out", dest="mupit_out", action="store_true", default=None ) - parser.add_argument("--gene_analysis_out", dest="gene_analysis_out", type=str, required=True) - parser.add_argument("--variant_analysis_out", dest="variant_analysis_out", type=str, required=True) - parser.add_argument("--amino_acid_level_analysis_out", dest="amino_acid_level_analysis_out", type=str, required=True,) - parser.add_argument("--error_file", dest="error_file_out", type=str, required=True) - parser.add_argument("--snv_box_out", dest="snv_box_out", type=str, required=False) - parser.add_argument("--snv_features", dest="snv_features_out", type=str, required=False) - args = (parser.parse_args(params)) - chasm_web = CHASMWeb(mutationbox=args.mutationbox, - filepath=args.input_file_location, - is_hg_18 = args.hg18, - analysis_type=args.analysis_type, - chosendb = args.chosendb, - cancer_type = args.cancer_type, - email=args.email, - annotate_genes=args.annotate, - text_reports=args.tsv_report, - mupit_out=args.mupit_out) - job_id=chasm_web.make_request() - file_map = {"Amino_Acid_Level_Analysis.tsv":args.amino_acid_level_analysis_out, - "SNVBox.tsv":args.snv_box_out, - "Variant_Analysis.tsv":args.variant_analysis_out, - "Gene_Level_Analysis.tsv":args.gene_analysis_out, - "SnvGet Feature Description.xls":args.snv_features_out, - "error.txt":args.error_file_out - } - url = chasm_web.zip_exists(job_id) - download = chasm_web.download_zip(url,job_id) - if download: - move = chasm_web.move_files(file_map=file_map) - else: - stop_err("Unable to download from the server") - -if __name__=="__main__": - main(sys.argv[1:]) diff -r 8aceb7fc57f5 -r b9de63c72559 tools/chasm/chasm_web.xml --- a/tools/chasm/chasm_web.xml Fri Nov 01 06:01:52 2013 +0530 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ - diff -r 8aceb7fc57f5 -r b9de63c72559 tools/chasm/tool_dependencies.xml --- a/tools/chasm/tool_dependencies.xml Fri Nov 01 06:01:52 2013 +0530 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ - - - - - - -