changeset 0:b319f980c9e6 draft

Uploaded
author saketkc
date Mon, 14 Apr 2014 17:27:06 -0400
parents
children 1cbb0f884e7c
files polyphen2_web/README.rst polyphen2_web/polyphen2_web.py polyphen2_web/polyphen2_web.xml polyphen2_web/test-data/chasm_input.txt polyphen2_web/test-data/ma_empty.csv polyphen2_web/test-data/ma_nucleotide_output.csv polyphen2_web/test-data/ma_proper_nucleotide.csv polyphen2_web/test-data/ma_proper_protein.csv polyphen2_web/test-data/ma_proper_protein.tsv polyphen2_web/test-data/ma_protein_output.csv polyphen2_web/test-data/polyphen2_full.txt polyphen2_web/test-data/polyphen2_input.txt polyphen2_web/test-data/polyphen2_log.txt polyphen2_web/test-data/polyphen2_short.txt polyphen2_web/test-data/polyphen2_snp.txt
diffstat 14 files changed, 495 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/README.rst	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,35 @@
+Galaxy wrapper for the Polyphen2 webservice 
+===================================================
+
+This tool is copyright 2014 by Saket Choudhary<saketkc@gmail.com>, Indian Institute of Technology Bombay
+All rights reserved. MIT licensed.
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Citations
+===========
+
+
+If you use this Galaxy tool in work leading to a scientific publication please cite:
+
+Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR. Nat Methods 7(4):248-249 (2010).
+"A method and server for predicting damaging missense mutations."
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/polyphen2_web.py	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,208 @@
+#!/usr/bin/python
+from bs4 import BeautifulSoup
+import argparse
+import sys
+import time
+import os
+import tempfile
+import requests
+import shutil
+import csv
+submission_url = 'http://genetics.bwh.harvard.edu/cgi-bin/ggi/ggi2.cgi'
+result_url = 'http://genetics.bwh.harvard.edu'
+
+refresh_interval = 30
+TIMEOUT = 60 * 60
+TIME_DELAY = 7
+MAX_TRIES = 30
+
+# Genome assembly version used for chromosome
+# coordinates of the SNPs in user input
+UCSCDB = ['hg19', 'hg18']
+# Classifier model used for predictions.
+MODELNAME = ['HumDiv', 'HumVar']
+
+# Set of transcripts on which genomic SNPs will be mapped
+SNPFILTER = {
+    'All': 0,
+    'Canonical': 1,
+    'CCDS': 3,
+}
+# Functional SNP categories to include in genomic SNPs annotation report
+SNPFUNCTION = ['c', 'm', '']
+
+
+def stop_err(msg, err=1):
+    sys.stderr.write('%s\n' % msg)
+    sys.exit(err)
+
+
+class Polyphen2Web:
+
+    def __init__(self, ucscdb=None, model_name=None, snp_filter=None,
+                 snp_function=None, file_location=None, email=None):
+        self.ucscdb = ucscdb
+        self.model_name = model_name
+        self.snp_filter = snp_filter
+        self.snp_function = snp_function
+        self.file_location = file_location
+        self.notify_me = email
+
+    def soupify(self, string):
+        return BeautifulSoup(string)
+
+    def make_request(self):
+        in_txt = csv.reader(open(self.file_location, 'rb'), delimiter='\t')
+        tmp_dir = tempfile.mkdtemp()
+        path = os.path.join(tmp_dir, 'csv_file')
+        with open(path, 'wb') as fh:
+            a = csv.writer(fh)
+            a.writerows(in_txt)
+        contents = open(self.file_location, 'r').read().replace(
+            '\t', ' ').replace('::::::::::::::', '')
+        if self.snp_function == 'All':
+            self.snp_function = ''
+        payload = {
+            '_ggi_project': 'PPHWeb2',
+            '_ggi_origin': 'query',
+            '_ggi_batch': contents,
+            '_ggi_target_pipeline': '1',
+            'MODELNAME': self.model_name,
+            'UCSCDB': self.ucscdb,
+            'SNPFILTER': SNPFILTER[self.snp_filter],
+            'SNPFUNC': self.snp_function,
+            'NOTIFYME': '',
+
+        }
+        if self.notify_me:
+            payload['NOTIFYME'] = self.notify_me
+        request = requests.post(submission_url, data=payload)
+        content = request.content
+        soup = self.soupify(content)
+        sid_soup = soup.find('input', {'name': 'sid'})
+        try:
+            sid = sid_soup['value']
+        except:
+            sid = None
+        shutil.rmtree(tmp_dir)
+        return sid
+
+    def poll_for_files(self, sid,
+                       max_tries=MAX_TRIES,
+                       time_delay=TIME_DELAY,
+                       timeout=TIMEOUT):
+        payload = {
+            '_ggi_project': 'PPHWeb2',
+            '_ggi_origin': 'manage',
+            '_ggi_target_manage': 'Refresh',
+            'sid': sid,
+        }
+        content = None
+        tries = 0
+        url_dict = None
+        while True:
+            tries += 1
+            if tries > max_tries:
+                stop_err('Number of tries exceeded!')
+            request = requests.post(submission_url, data=payload)
+            content = request.content
+            soup = self.soupify(content)
+            all_tables = soup.findAll('table')
+            if all_tables:
+                try:
+                    running_jobs_table = all_tables[-2]
+                except:
+                    running_jobs_table = None
+                if running_jobs_table:
+                    rows = running_jobs_table.findAll('tr')
+                    if len(rows) == 1:
+                        row = rows[0]
+                        hrefs = row.findAll('a')
+                        # print hrefs
+                        if len(hrefs) >= 3:
+                            short_txt = hrefs[0]['href']
+                            # print short_txt
+                            path = short_txt.split('-')[0]
+                            full_txt = result_url + path + '-full.txt'
+                            log_txt = result_url + path + '-log.txt'
+                            snps_txt = result_url + path + '-snps.txt'
+                            short_txt = result_url + path + \
+                                '-short.txt'  # short_txt
+                            url_dict = {
+                                'full_file': full_txt,
+                                'snps_file': snps_txt,
+                                'log_file': log_txt,
+                                'short_file': short_txt,
+                            }
+                            return url_dict
+            time.sleep(time_delay)
+        return url_dict
+
+    def save_to_files(self, url_dict, args):
+        tmp_dir = tempfile.mkdtemp()
+        for key, value in url_dict.iteritems():
+            r = requests.get(value, stream=True)
+            if r.status_code == 200:
+                path = os.path.join(tmp_dir, key)
+                with open(path, 'wb') as f:
+                    for chunk in r.iter_content(128):
+                        f.write(chunk)
+                shutil.move(path, args[key])
+        if os.path.exists(tmp_dir):
+            shutil.rmtree(tmp_dir)
+        return True
+
+
+def main(args):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-u',
+                        '--ucscdb',
+                        dest='ucscdb',
+                        choices=UCSCDB,
+                        required=True, type=str)
+    parser.add_argument('-m', '--model',
+                        dest='modelname', choices=MODELNAME,
+                        required=True, type=str)
+    parser.add_argument('-fl', '--filter',
+                        '--snpfilter', dest='snpfilter',
+                        choices=SNPFILTER.keys(),
+                        required=True, type=str)
+    parser.add_argument('-i', '--input',
+                        dest='input', nargs='?',
+                        required=True, type=str,
+                        default=sys.stdin)
+    parser.add_argument('-e', '--email',
+                        dest='email',
+                        required=False, default=None)
+    parser.add_argument('--log', dest='log_file',
+                        required=True, default=None, type=str)
+    parser.add_argument('--short', dest='short_file',
+                        required=True, default=None, type=str)
+    parser.add_argument('--full', dest='full_file',
+                        required=True, default=None, type=str)
+    parser.add_argument('--snp', dest='snps_file',
+                        required=True, default=None, type=str)
+    parser.add_argument('--function', dest='snpfunction',
+                        required=True, type=str)
+    args_s = vars(parser.parse_args(args))
+    polyphen2_web = Polyphen2Web(ucscdb=args_s['ucscdb'],
+                                 model_name=args_s['modelname'],
+                                 snp_filter=args_s['snpfilter'],
+                                 snp_function=args_s['snpfunction'],
+                                 file_location=args_s['input'],
+                                 email=args_s['email'])
+    sid = polyphen2_web.make_request()
+    if not sid:
+        stop_err(
+            'Something went wrong! The tracking id could not be retrieved.')
+    url_dict = polyphen2_web.poll_for_files(sid)
+    locations = {}
+    if not url_dict:
+        stop_err('There was error downloading the output files!')
+    for key in url_dict.keys():
+        locations[key] = args_s[key]
+    polyphen2_web.save_to_files(url_dict, locations)
+    return True
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/polyphen2_web.xml	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,104 @@
+<tool id="polyphen2_web" name="PolyPhen-2 Webservice">
+    <description>Compute functional impact of SNVs </description>
+    <requirements>
+        <requirement type="package" version="4.1.0">beautifulsoup4</requirement>
+        <requirement type="python-module">bs4</requirement>
+    </requirements>
+    <command interpreter="python">
+        polyphen2_web.py --ucscdb $ucscdb
+                         --model $model
+                         --filter $filter
+                         --function $function
+                         --input $input
+                         --log $log_file
+                         --full $full_file
+                         --short $short_file
+                         --snp $snp_file
+    </command>
+    <inputs>
+        <param format="txt" name="input" type="data" label="Variants File" />
+        <param name="ucscdb" type="select" label="Genome Assembly">
+            <option value="hg19">GRCh37/hg19</option>
+            <option value="hg18">NCBI36/hg18</option>
+        </param>
+        <param name="model" type="select" label="Classifier Model">
+            <option value="HumDiv">HumDiv</option>
+            <option value="HumVar">HumVar</option>
+        </param>
+        <param name="filter" type="select" label="Transcripts">
+            <option value="All">All</option>
+            <option value="Canonical">Canonical</option>
+            <option value="CCDS">CCDS</option>
+        </param>
+        <param name="function" type="select" label="Annotations">
+            <option value="c">Canonical</option>
+            <option value="m">CCDS</option>
+            <option value="All">All</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="log_file" label="${tool.name} on ${on_string}: log" />
+        <data format="tabular" name="full_file" label="${tool.name} on ${on_string}: full"/>
+        <data format="tabular" name="short_file" label="${tool.name} on ${on_string}: short"/>
+        <data format="tabular" name="snp_file" label="${tool.name} on ${on_string}: snp"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input" value="polyphen2_input.txt"/>
+            <param name="ucscdb" value="hg19"/>
+            <param name="model" value="HumDiv"/>
+            <param name="filter" value="All"/>
+            <param name="function" value="All"/>
+            <output name="log_file" file="polyphen2_log.txt"/>
+            <output name="full_file" file="polyphen2_full.txt"/>
+            <output name="short_file" file="polyphen2_short.txt"/>
+            <output name="snp_file" file="polyphen2_snp.txt"/>
+        </test>
+    </tests>
+    <help>
+        **What it does**
+            This tool interacts with the Web Version of Polyphen2 hosted at  http://genetics.bwh.harvard.edu/pph2/
+
+            PolyPhen-2 (Polymorphism Phenotyping v2) is a software tool which predicts possible impact of amino acid substitutions
+            on the structure and function of human proteins using straightforward physical and evolutionary comparative considerations.
+
+            .. class:: infomark
+
+            * HumDiv model uses 5% / 10% FPR thresholds for “probably damaging” / “possibly damaging” predictions
+
+            .. class:: infomark
+
+            * HumVar model uses 10% / 20% FPR thresholds for “probably damaging” / “possibly damaging” predictions
+
+            .. class:: infomark
+
+
+            '''Input format''':
+
+
+            chr22:30421786 A/T
+
+            chr22:29446079 A/G
+
+            chr22:40814500 A/G
+
+            chr22:40815256 C/T
+
+
+            **Citations**
+
+                If you use this tool please cite:
+
+                Adzhubei IA, Schmidt S, Peshkin L, Ramensky VE, Gerasimova A, Bork P, Kondrashov AS, Sunyaev SR. Nat Methods 7(4):248-249 (2010).
+                "A method and server for predicting damaging missense mutations."
+
+    </help>
+</tool>
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/chasm_input.txt	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,5 @@
+TR1 chr22 30421786 + A T
+TR2 chr22 29446079 + A G
+TR3 chr22 29446079 + A G
+TR4 chr22 40814500 - A G
+TR5 chr22 40815256 + C T
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/ma_nucleotide_output.csv	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,4 @@
+"","Mutation","AA variant","Gene","MSA","PDB","Func. Impact","FI score","Uniprot","Refseq","MSA height","Codon start position","Func. region","Protein bind.site","DNA/RNA bind.site","small.mol bind.site"
+"1","hg19,13,32912555,G,T","D1355Y","BRCA2","http://getma.org/?cm=msa&ty=f&p=BRCA2_HUMAN&rb=1247&re=1420&var=D1355Y","","low","1.24","BRCA2_HUMAN","NP_000050","14","chr13:32912555","","","",""
+"2","hg19,7,55178574,G,A","","","","","","","","","0","","","","",""
+"3","hg19,7,55178574,G,A","","","","","","","","","0","","","","",""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/ma_proper_nucleotide.csv	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,3 @@
+13,32912555,G,T   BRCA2
+7,55178574,G,A   GBM
+7,55178574,G,A   GBM
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/ma_proper_protein.csv	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,6 @@
+EGFR_HUMAN,R521K
+EGFR_HUMAN,R98Q,Polymorphism
+EGFR_HUMAN,G719D,disease
+NP_000537,G356A
+NP_000537,G360A,dbSNP:rs35993958
+NP_000537,S46A,Abolishes,phosphorylation
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/ma_proper_protein.tsv	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,6 @@
+EGFR_HUMAN R521K 
+EGFR_HUMAN R98Q Polymorphism 
+EGFR_HUMAN G719D disease 
+NP_000537 G356A 
+NP_000537 G360A dbSNP:rs35993958 
+NP_000537 S46A Abolishes phosphorylation 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/ma_protein_output.csv	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,7 @@
+"","Mutation","AA variant","Gene","MSA","PDB","Func. Impact","FI score","Uniprot","Refseq","MSA height","Codon start position","Func. region","Protein bind.site","DNA/RNA bind.site","small.mol bind.site"
+"1","EGFR_HUMAN R521K","R521K","EGFR","http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=482&re=681&var=R521K","http://getma.org/pdb.php?prot=EGFR_HUMAN&from=482&to=681&var=R521K","neutral","0.405","EGFR_HUMAN","NP_005219","399","chr7:55196748","1","1","",""
+"2","EGFR_HUMAN R98Q","R98Q","EGFR","http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=57&re=168&var=R98Q","http://getma.org/pdb.php?prot=EGFR_HUMAN&from=57&to=168&var=R98Q","neutral","0.6","EGFR_HUMAN","NP_005219","181","chr7:55178543","","","",""
+"3","EGFR_HUMAN G719D","G719D","EGFR","http://getma.org/?cm=msa&ty=f&p=EGFR_HUMAN&rb=712&re=968&var=G719D","http://getma.org/pdb.php?prot=EGFR_HUMAN&from=712&to=968&var=G719D","medium","3.115","EGFR_HUMAN","NP_005219","700","chr7:55209201","1","1","","0UN IRE FMM ANP CY7 HYZ 03P ITI DKI 685 T95 T74 ZZY M97 6XP 0K0 KRW 0JJ 0K1 P17 112 1N1 JIN STI P5C 585 S19 P16 VX6 P3Y SX7 ACK B90 AMP ZD6 STU 7PY BI9 BII ATP ADP 4ST VG8 YAM P1E 7X4 7X5 7X6 7X8 349 3JZ"
+"4","NP_000537 G356A","G356A","TP53","http://getma.org/?cm=msa&ty=f&p=P53_HUMAN&rb=318&re=359&var=G356A","http://getma.org/pdb.php?prot=P53_HUMAN&from=318&to=359&var=G356A","neutral","-0.895","P53_HUMAN","NP_000537","45","chr17:7514684","","1","",""
+"5","NP_000537 G360A","G360A","TP53","http://getma.org/?cm=msa&ty=f&p=P53_HUMAN&rb=360&re=393&var=G360A","","medium","2.25","P53_HUMAN","NP_000537","15","chr17:7514672","","","",""
+"6","NP_000537 S46A","S46A","TP53","http://getma.org/?cm=msa&ty=f&p=P53_HUMAN&rb=30&re=94&var=S46A","http://getma.org/pdb.php?prot=P53_HUMAN&from=30&to=94&var=S46A","neutral","0.55","P53_HUMAN","NP_000537","5","chr17:7520274","1","1","",""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/polyphen2_full.txt	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,23 @@
+#o_acc               	 o_pos	o_aa1	o_aa2	rsid      	acc       	   pos	aa1	aa2	nt1	nt2	        prediction	            based_on	    effect	        pph2_class	 pph2_prob	  pph2_FPR	  pph2_TPR	  pph2_FDR	    site	  region	    PHAT	dScore	Score1	Score2	MSAv	  Nobs	 Nstruct	 Nfilt	PDB_id	PDB_pos	PDB_ch	 ident	length	NormASA	SecStr	MapReg	  dVol	 dProp	B-fact	 H-bonds	 AveNHet	 MinDHet	 AveNInt	 MinDInt	 AveNSit	 MinDSit	Transv	CodPos	CpG	 MinDJxn	     PfamHit	  IdPmax	  IdPSNP	  IdQmin
+Q13615-2            	  1170	    N	    I	         ?	Q13615-2  	  1170	  N	  I	  A	  T	 probably damaging	           alignment	         ?	       deleterious	     0.998	    0.0112	     0.273	    0.0274	       ?	       ?	       ?	+2.214	-1.705	-3.919	   2	    37	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     1	  0	   -2313	           ?	   1.268	       ?	   47.09	# chr22:30421786|AT|uc003agu.3+|MTMR3|NP_694690
+Q13615              	  1198	    N	    I	         ?	Q13615    	  1198	  N	  I	  A	  T	 probably damaging	           alignment	         ?	       deleterious	     0.998	    0.0112	     0.273	    0.0274	      NO	      NO	       ?	+2.296	-1.580	-3.876	   2	    38	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     1	  0	   -3099	          NO	   1.010	       ?	   45.58	# chr22:30421786|AT|uc003agv.3+|MTMR3|NP_066576
+Q13615-3            	  1161	    N	    I	         ?	Q13615-3  	  1161	  N	  I	  A	  T	 probably damaging	           alignment	         ?	       deleterious	     0.998	    0.0112	     0.273	    0.0274	       ?	       ?	       ?	+2.214	-1.705	-3.919	   2	    37	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     1	  0	   -3099	           ?	   1.275	       ?	   47.37	# chr22:30421786|AT|uc003agw.3+|MTMR3|NP_694691
+Q9ULT6              	   637	    H	    R	         ?	Q9ULT6    	   637	  H	  R	  A	  G	            benign	           alignment	         ?	           neutral	     0.002	     0.704	     0.987	     0.452	      NO	      NO	       ?	+0.398	-2.258	-2.656	   2	    47	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     0	     1	  2	    +858	          NO	  20.363	  20.363	   77.46	# chr22:29446079|AG|uc003aeg.2+|ZNRF3|NP_115549
+Q9ULT6              	   637	    H	    R	         ?	Q9ULT6    	   637	  H	  R	  A	  G	            benign	           alignment	         ?	           neutral	     0.002	     0.704	     0.987	     0.452	      NO	      NO	       ?	+0.398	-2.258	-2.656	   2	    47	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     0	     1	  2	   -1599	          NO	  20.363	  20.363	   77.46	# chr22:29446079|AG|uc003aeh.1+|ZNRF3|NP_115549
+Q969V6              	   648	    S	    C	         ?	Q969V6    	   648	  S	  C	  A	  T	 possibly damaging	           alignment	         ?	       deleterious	      0.89	    0.0639	     0.821	    0.0953	      NO	COMPBIAS	       ?	+2.837	-1.909	-4.746	   2	    32	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  0	    +123	          NO	   1.320	       ?	   90.33	# chr22:40814500|TA|uc003ayv.1-|MKL1|NP_065882
+Q969V6              	   648	    S	    R	         ?	Q969V6    	   648	  S	  R	  A	  C	            benign	           alignment	         ?	           neutral	     0.167	     0.131	      0.92	     0.162	      NO	COMPBIAS	       ?	+1.814	-1.909	-3.723	   2	    32	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  2	    +123	          NO	   2.525	       ?	   90.33	# chr22:40814500|TG|uc003ayv.1-|MKL1|NP_065882
+Q969V6              	   648	    S	    C	         ?	Q969V6    	   648	  S	  C	  A	  T	 possibly damaging	           alignment	         ?	       deleterious	      0.89	    0.0639	     0.821	    0.0953	      NO	COMPBIAS	       ?	+2.837	-1.909	-4.746	   2	    32	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  0	    +123	          NO	   1.320	       ?	   90.33	# chr22:40814500|TA|uc003ayw.1-|MKL1|NP_065882
+Q969V6              	   648	    S	    R	         ?	Q969V6    	   648	  S	  R	  A	  C	            benign	           alignment	         ?	           neutral	     0.167	     0.131	      0.92	     0.162	      NO	COMPBIAS	       ?	+1.814	-1.909	-3.723	   2	    32	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  2	    +123	          NO	   2.525	       ?	   90.33	# chr22:40814500|TG|uc003ayw.1-|MKL1|NP_065882
+E7ER32              	   648	    S	    C	         ?	E7ER32    	   648	  S	  C	  A	  T	 possibly damaging	           alignment	         ?	       deleterious	     0.953	    0.0514	     0.788	    0.0812	      NO	      NO	       ?	+2.837	-1.909	-4.746	   2	    33	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  0	    +123	          NO	   1.255	       ?	   87.22	# chr22:40814500|TA|uc010gye.1-|MKL1|
+E7ER32              	   648	    S	    R	         ?	E7ER32    	   648	  S	  R	  A	  C	            benign	           alignment	         ?	           neutral	     0.337	     0.111	     0.901	     0.142	      NO	      NO	       ?	+1.814	-1.909	-3.723	   2	    33	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  2	    +123	          NO	   2.402	       ?	   87.22	# chr22:40814500|TG|uc010gye.1-|MKL1|
+B0QY83              	   598	    S	    C	         ?	B0QY83    	   598	  S	  C	  A	  T	 possibly damaging	        alignment_mz	         ?	       deleterious	     0.726	    0.0797	     0.856	     0.112	      NO	      NO	       ?	+2.847	-1.931	-4.778	   3	    31	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  0	    +123	          NO	   1.615	       ?	   91.49	# chr22:40814500|TA|uc010gyf.1-|MKL1|NP_065882
+B0QY83              	   598	    S	    R	         ?	B0QY83    	   598	  S	  R	  A	  C	            benign	        alignment_mz	         ?	           neutral	     0.047	     0.168	     0.942	     0.195	      NO	      NO	       ?	+1.674	-1.931	-3.605	   3	    31	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     1	     0	  2	    +123	          NO	   5.560	       ?	   91.49	# chr22:40814500|TG|uc010gyf.1-|MKL1|NP_065882
+Q969V6              	   396	    A	    T	         ?	Q969V6    	   396	  A	  T	  G	  A	            benign	           alignment	         ?	           neutral	     0.009	     0.233	     0.961	     0.247	      NO	      NO	       ?	+0.097	-1.540	-1.637	   2	    39	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     0	     0	  1	    +879	          NO	  21.659	  21.659	   88.08	# chr22:40815256|CT|uc003ayv.1-|MKL1|NP_065882
+Q969V6              	   396	    A	    T	         ?	Q969V6    	   396	  A	  T	  G	  A	            benign	           alignment	         ?	           neutral	     0.009	     0.233	     0.961	     0.247	      NO	      NO	       ?	+0.097	-1.540	-1.637	   2	    39	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     0	     0	  1	    +879	          NO	  21.659	  21.659	   88.08	# chr22:40815256|CT|uc003ayw.1-|MKL1|NP_065882
+E7ER32              	   396	    A	    T	         ?	E7ER32    	   396	  A	  T	  G	  A	            benign	           alignment	         ?	           neutral	     0.009	     0.233	     0.961	     0.247	      NO	      NO	       ?	+0.097	-1.540	-1.637	   2	    39	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     0	     0	  1	    +879	          NO	  20.554	  20.554	   83.58	# chr22:40815256|CT|uc010gye.1-|MKL1|
+B0QY83              	   346	    A	    T	         ?	B0QY83    	   346	  A	  T	  G	  A	            benign	        alignment_mz	         ?	           neutral	     0.008	     0.239	     0.963	     0.252	      NO	      NO	       ?	+0.456	-1.547	-2.003	   3	    32	       ?	     ?	     ?	      ?	     ?	     ?	     ?	      ?	     ?	     ?	     ?	     ?	     ?	       ?	       ?	       ?	       ?	       ?	       ?	       ?	     0	     0	  1	    +879	          NO	  21.940	  21.940	   89.22	# chr22:40815256|CT|uc010gyf.1-|MKL1|NP_065882
+## Sources:
+##   Predictions: PolyPhen-2 v2.2.2r398
+##   Sequences:   UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011)
+##   Structures:  PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures)
+##   Genes:       UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/polyphen2_input.txt	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,5 @@
+chr22:30421786 A/T
+chr22:29446079 A/G
+chr22:40814500 A/G
+chr22:40815256 C/T
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/polyphen2_log.txt	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,38 @@
+===========================
+Stage 1/7: Validating input
+===========================
+No errors
+
+===============================
+Stage 2/7: Mapping genomic SNPs
+===============================
+WARNING: (chr22:40814500 - uc003ayv.1) None of the input alleles (A/G) matches reference allele (T)
+WARNING: (chr22:40814500 - uc003ayw.1) None of the input alleles (A/G) matches reference allele (T)
+WARNING: (chr22:40814500 - uc010gye.1) None of the input alleles (A/G) matches reference allele (T)
+WARNING: (chr22:40814500 - uc010gyf.1) None of the input alleles (A/G) matches reference allele (T)
+Total errors/warnings: 4
+
+============================
+Stage 3/7: Collecting output
+============================
+No errors
+
+===============================================
+Stage 4/7: Building MSA and annotating proteins
+===============================================
+No errors
+
+============================
+Stage 5/7: Collecting output
+============================
+No errors
+
+=====================
+Stage 6/7: Predicting
+=====================
+No errors
+
+=============================
+Stage 7/7: Generating reports
+=============================
+No errors
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/polyphen2_short.txt	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,23 @@
+#o_acc               	 o_pos	o_aa1	o_aa2	rsid      	acc       	   pos	aa1	aa2	        prediction	 pph2_prob	  pph2_FPR	  pph2_TPR
+Q13615-2            	  1170	    N	    I	         ?	Q13615-2  	  1170	  N	  I	 probably damaging	     0.998	    0.0112	     0.273	# chr22:30421786|AT|uc003agu.3+|MTMR3|NP_694690
+Q13615              	  1198	    N	    I	         ?	Q13615    	  1198	  N	  I	 probably damaging	     0.998	    0.0112	     0.273	# chr22:30421786|AT|uc003agv.3+|MTMR3|NP_066576
+Q13615-3            	  1161	    N	    I	         ?	Q13615-3  	  1161	  N	  I	 probably damaging	     0.998	    0.0112	     0.273	# chr22:30421786|AT|uc003agw.3+|MTMR3|NP_694691
+Q9ULT6              	   637	    H	    R	         ?	Q9ULT6    	   637	  H	  R	            benign	     0.002	     0.704	     0.987	# chr22:29446079|AG|uc003aeg.2+|ZNRF3|NP_115549
+Q9ULT6              	   637	    H	    R	         ?	Q9ULT6    	   637	  H	  R	            benign	     0.002	     0.704	     0.987	# chr22:29446079|AG|uc003aeh.1+|ZNRF3|NP_115549
+Q969V6              	   648	    S	    C	         ?	Q969V6    	   648	  S	  C	 possibly damaging	      0.89	    0.0639	     0.821	# chr22:40814500|TA|uc003ayv.1-|MKL1|NP_065882
+Q969V6              	   648	    S	    R	         ?	Q969V6    	   648	  S	  R	            benign	     0.167	     0.131	      0.92	# chr22:40814500|TG|uc003ayv.1-|MKL1|NP_065882
+Q969V6              	   648	    S	    C	         ?	Q969V6    	   648	  S	  C	 possibly damaging	      0.89	    0.0639	     0.821	# chr22:40814500|TA|uc003ayw.1-|MKL1|NP_065882
+Q969V6              	   648	    S	    R	         ?	Q969V6    	   648	  S	  R	            benign	     0.167	     0.131	      0.92	# chr22:40814500|TG|uc003ayw.1-|MKL1|NP_065882
+E7ER32              	   648	    S	    C	         ?	E7ER32    	   648	  S	  C	 possibly damaging	     0.953	    0.0514	     0.788	# chr22:40814500|TA|uc010gye.1-|MKL1|
+E7ER32              	   648	    S	    R	         ?	E7ER32    	   648	  S	  R	            benign	     0.337	     0.111	     0.901	# chr22:40814500|TG|uc010gye.1-|MKL1|
+B0QY83              	   598	    S	    C	         ?	B0QY83    	   598	  S	  C	 possibly damaging	     0.726	    0.0797	     0.856	# chr22:40814500|TA|uc010gyf.1-|MKL1|NP_065882
+B0QY83              	   598	    S	    R	         ?	B0QY83    	   598	  S	  R	            benign	     0.047	     0.168	     0.942	# chr22:40814500|TG|uc010gyf.1-|MKL1|NP_065882
+Q969V6              	   396	    A	    T	         ?	Q969V6    	   396	  A	  T	            benign	     0.009	     0.233	     0.961	# chr22:40815256|CT|uc003ayv.1-|MKL1|NP_065882
+Q969V6              	   396	    A	    T	         ?	Q969V6    	   396	  A	  T	            benign	     0.009	     0.233	     0.961	# chr22:40815256|CT|uc003ayw.1-|MKL1|NP_065882
+E7ER32              	   396	    A	    T	         ?	E7ER32    	   396	  A	  T	            benign	     0.009	     0.233	     0.961	# chr22:40815256|CT|uc010gye.1-|MKL1|
+B0QY83              	   346	    A	    T	         ?	B0QY83    	   346	  A	  T	            benign	     0.008	     0.239	     0.963	# chr22:40815256|CT|uc010gyf.1-|MKL1|NP_065882
+## Sources:
+##   Predictions: PolyPhen-2 v2.2.2r398
+##   Sequences:   UniProtKB/UniRef100 Release 2011_12 (14-Dec-2011)
+##   Structures:  PDB/DSSP Snapshot 03-Jan-2012 (78304 Structures)
+##   Genes:       UCSC MultiZ46Way GRCh37/hg19 (08-Oct-2009)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/polyphen2_web/test-data/polyphen2_snp.txt	Mon Apr 14 17:27:06 2014 -0400
@@ -0,0 +1,28 @@
+#snp_pos         	str	        gene	  transcript	 ccid	        ccds	cciden	refa	        type	   ntpos	nt1	nt2	flanks	trv	cpg	  jxdon	  jxacc	   exon	  cexon	jxc	dgn	  cdnpos	frame	cdn1	cdn2	aa1	aa2	   aapos	spmap	     spacc	      spname	    refs_acc	    dbrsid	dbobsrvd	   dbavHet	 dbavHetSE	dbRmPaPt	comments
+chr22:30421786  	  +	       MTMR3	  uc003agu.3	    ?	 CCDS13871.1	     1	 A/T	    missense	  142629	  A	  T	    AC	  1	  0	  -2313	   -168	  20/20	  18/18	  ?	  0	    1170	    1	 AAC	 ATC	  N	  I	    1170	    1	  Q13615-2	 MTMR3_HUMAN	   NP_694690	rs75623810	     A/T	  0.016564	  0.089485	   A>A>A
+chr22:30421786  	  +	       MTMR3	  uc003agv.3	16552	 CCDS13870.1	     1	 A/T	    missense	  142629	  A	  T	    AC	  1	  0	  -3099	   -168	  20/20	  18/18	  ?	  0	    1198	    1	 AAC	 ATC	  N	  I	    1198	    1	    Q13615	 MTMR3_HUMAN	   NP_066576	rs75623810	     A/T	  0.016564	  0.089485	   A>A>A
+chr22:30421786  	  +	       MTMR3	  uc003agw.3	    ?	 CCDS46682.1	     1	 A/T	    missense	  142629	  A	  T	    AC	  1	  0	  -3099	   -168	  19/19	  17/17	  ?	  0	    1161	    1	 AAC	 ATC	  N	  I	    1161	    1	  Q13615-3	 MTMR3_HUMAN	   NP_694691	rs75623810	     A/T	  0.016564	  0.089485	   A>A>A
+chr22:29446079  	  +	       ZNRF3	  uc003aeg.2	16531	 CCDS42999.1	     1	 A/G	    missense	  166190	  A	  G	    CC	  0	  2	   +858	   -895	    8/9	    7/8	  ?	  0	     537	    1	 CAC	 CGC	  H	  R	     637	    1	    Q9ULT6	 ZNRF3_HUMAN	   NP_115549	rs62641746	     A/G	  0.030762	  0.120144	   A>A>A
+chr22:29446079  	  +	       ZNRF3	  uc003aeh.1	    ?	 CCDS42999.1	 0.982	 A/G	    missense	   63040	  A	  G	    CC	  0	  2	  -1599	   -895	    7/7	    7/7	  ?	  0	     537	    1	 CAC	 CGC	  H	  R	     637	    1	    Q9ULT6	 ZNRF3_HUMAN	   NP_115549	rs62641746	     A/G	  0.030762	  0.120144	   A>A>A
+chr22:40814500  	  -	        MKL1	  uc003ayv.1	    ?	 CCDS14003.1	     1	 T/A	    missense	   44939	  A	  T	    CG	  1	  0	   +123	   -889	   9/12	   9/12	  ?	  0	     648	    0	 AGC	 TGC	  S	  C	     648	    1	    Q969V6	  MKL1_HUMAN	   NP_065882	         ?	       ?	         ?	         ?	       ?
+chr22:40814500  	  -	        MKL1	  uc003ayv.1	    ?	 CCDS14003.1	     1	 T/G	    missense	   44939	  A	  C	    CG	  1	  2	   +123	   -889	   9/12	   9/12	  ?	  0	     648	    0	 AGC	 CGC	  S	  R	     648	    1	    Q969V6	  MKL1_HUMAN	   NP_065882	         ?	       ?	         ?	         ?	       ?
+chr22:40814500  	  -	        MKL1	  uc003ayw.1	16752	 CCDS14003.1	     1	 T/A	    missense	  218191	  A	  T	    CG	  1	  0	   +123	   -889	  12/15	   9/12	  ?	  0	     648	    0	 AGC	 TGC	  S	  C	     648	    1	    Q969V6	  MKL1_HUMAN	   NP_065882	         ?	       ?	         ?	         ?	       ?
+chr22:40814500  	  -	        MKL1	  uc003ayw.1	16752	 CCDS14003.1	     1	 T/G	    missense	  218191	  A	  C	    CG	  1	  2	   +123	   -889	  12/15	   9/12	  ?	  0	     648	    0	 AGC	 CGC	  S	  R	     648	    1	    Q969V6	  MKL1_HUMAN	   NP_065882	         ?	       ?	         ?	         ?	       ?
+chr22:40814500  	  -	        MKL1	  uc010gye.1	    ?	           ?	     ?	 T/A	    missense	  218191	  A	  T	    CG	  1	  0	   +123	   -889	  12/15	   9/12	  ?	  0	     648	    0	 AGC	 TGC	  S	  C	     648	    1	    E7ER32	E7ER32_HUMAN	           ?	         ?	       ?	         ?	         ?	       ?
+chr22:40814500  	  -	        MKL1	  uc010gye.1	    ?	           ?	     ?	 T/G	    missense	  218191	  A	  C	    CG	  1	  2	   +123	   -889	  12/15	   9/12	  ?	  0	     648	    0	 AGC	 CGC	  S	  R	     648	    1	    E7ER32	E7ER32_HUMAN	           ?	         ?	       ?	         ?	         ?	       ?
+chr22:40814500  	  -	        MKL1	  uc010gyf.1	    ?	           ?	     ?	 T/A	    missense	  218191	  A	  T	    CG	  1	  0	   +123	   -889	  11/14	   8/11	  ?	  0	     598	    0	 AGC	 TGC	  S	  C	     598	    1	    B0QY83	B0QY83_HUMAN	   NP_065882	         ?	       ?	         ?	         ?	       ?
+chr22:40814500  	  -	        MKL1	  uc010gyf.1	    ?	           ?	     ?	 T/G	    missense	  218191	  A	  C	    CG	  1	  2	   +123	   -889	  11/14	   8/11	  ?	  0	     598	    0	 AGC	 CGC	  S	  R	     598	    1	    B0QY83	B0QY83_HUMAN	   NP_065882	         ?	       ?	         ?	         ?	       ?
+chr22:40815256  	  -	        MKL1	  uc003ayv.1	    ?	 CCDS14003.1	     1	 C/T	    missense	   44183	  G	  A	    CC	  0	  1	   +879	   -133	   9/12	   9/12	  ?	  0	     396	    0	 GCC	 ACC	  A	  T	     396	    1	    Q969V6	  MKL1_HUMAN	   NP_065882	rs34736200	     G/A	  0.047299	   0.14633	   A>A>A
+chr22:40815256  	  -	        MKL1	  uc003ayw.1	16752	 CCDS14003.1	     1	 C/T	    missense	  217435	  G	  A	    CC	  0	  1	   +879	   -133	  12/15	   9/12	  ?	  0	     396	    0	 GCC	 ACC	  A	  T	     396	    1	    Q969V6	  MKL1_HUMAN	   NP_065882	rs34736200	     G/A	  0.047299	   0.14633	   A>A>A
+chr22:40815256  	  -	        MKL1	  uc010gye.1	    ?	           ?	     ?	 C/T	    missense	  217435	  G	  A	    CC	  0	  1	   +879	   -133	  12/15	   9/12	  ?	  0	     396	    0	 GCC	 ACC	  A	  T	     396	    1	    E7ER32	E7ER32_HUMAN	           ?	rs34736200	     G/A	  0.047299	   0.14633	   A>A>A
+chr22:40815256  	  -	        MKL1	  uc010gyf.1	    ?	           ?	     ?	 C/T	    missense	  217435	  G	  A	    CC	  0	  1	   +879	   -133	  11/14	   8/11	  ?	  0	     346	    0	 GCC	 ACC	  A	  T	     346	    1	    B0QY83	B0QY83_HUMAN	   NP_065882	rs34736200	     G/A	  0.047299	   0.14633	   A>A>A
+## Totals:
+##   lines input               4
+##   lines skipped             0
+##   alleles annotated        17
+##     missense               17
+##     nonsense                0
+##     coding-synon            0
+##     intron                  0
+##     utr-3                   0
+##     utr-5                   0