annotate iedb_api.py @ 0:7a9ecf229480 draft default tip

Uploaded
author jjohnson
date Mon, 28 Sep 2015 11:27:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
1 #!/usr/bin/env python
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
2 """
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
3 """
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
4 import sys
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
5 import os.path
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
6 import re
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
7 import optparse
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
8 import urllib
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
9 import urllib2
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
10 from optparse import OptionParser
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
11
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
12 mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket']
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
13 mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope']
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
14 processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008']
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
15 mhcnp_methods = ['mhcnp']
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
16 bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker']
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
17 prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods}
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
18
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
19 def warn_err(msg,exit_code=1):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
20 sys.stderr.write(msg)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
21 if exit_code:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
22 sys.exit(exit_code)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
23
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
24
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
25 def __main__():
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
26 #Parse Command Line
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
27 parser = optparse.OptionParser()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
28 parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
29 parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
30 parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
31 parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
32 parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
33 parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
34 parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
35 parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
36 parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
37 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' )
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
38 (options, args) = parser.parse_args()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
39
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
40 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
41
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
42 if not options.allele and options.prediction != 'bcell':
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
43 warn_err('-a allele required\n', exit_code=1)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
44
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
45 if not (options.sequence or options.input):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
46 warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
47
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
48 if options.output != None:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
49 try:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
50 outputPath = os.path.abspath(options.output)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
51 outputFile = open(outputPath, 'w')
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
52 except Exception, e:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
53 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
54 else:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
55 outputFile = sys.stdout
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
56
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
57 url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
58
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
59 #TODO parse alleles from the options.alleles file
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
60 alleles = ','.join(options.allele)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
61 lengths = ','.join(options.length)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
62 method = options.method
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
63
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
64 results = []
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
65 global header
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
66 header = None
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
67
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
68 sequence_text = []
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
69 def add_seq(seqid,seq):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
70 sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq))
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
71
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
72 def query(url,seq,allele,length,seqid=None,method='recommended'):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
73 global header
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
74 params = dict()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
75 if method:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
76 params['method'] = method
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
77 params['sequence_text'] = seq
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
78 params['allele'] = allele
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
79 params['length'] = length
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
80 data = urllib.urlencode(params)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
81 request = urllib2.Request(url, data)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
82 if options.debug:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
83 print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
84 response = None
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
85 response = urllib2.urlopen(request)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
86 if response and response.getcode() == 200:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
87 resp_data = response.readlines()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
88 for line in resp_data:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
89 if line.find('eptide') > 0:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
90 header = "#%s%s" % ("ID\t" if seqid else "", line)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
91 continue
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
92 if seqid:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
93 results.append("%s\t%s" % (seqid,line))
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
94 else:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
95 results.append(line)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
96 elif not response:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
97 warn_err("NO response from IEDB server\n", exit_code=3)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
98 else:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
99 warn_err("Error connecting to IEDB server\n", exit_code=response.getcode())
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
100
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
101 if options.sequence:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
102 for i,seq in enumerate(options.sequence):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
103 query(url,seq,alleles,lengths,seqid=None,method=method)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
104 if options.input:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
105 try:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
106 fh = open(options.input,'r')
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
107 if options.column: ## tabular
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
108 col = int(options.column)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
109 idcol = int(options.id_column) if options.id_column else None
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
110 for i,line in enumerate(fh):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
111 fields = line.split('\t')
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
112 if len(fields) > col:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
113 seq = re.sub('[_*]','',fields[col])
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
114 if re.match(aapat,seq):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
115 seqid = fields[idcol] if idcol != None and idcol < len(fields) else None
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
116 query(url,seq,alleles,lengths,seqid=seqid,method=method)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
117 else:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
118 warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
119 else: ## fasta
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
120 seqid = None
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
121 seq = ''
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
122 for i,line in enumerate(fh):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
123 if line.startswith('>'):
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
124 if seqid and len(seq) > 0:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
125 query(url,seq,alleles,lengths,seqid=seqid,method=method)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
126 seqid = line[1:].strip()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
127 seq = ''
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
128 else:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
129 seq += line.strip()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
130 if seqid and len(seq) > 0:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
131 query(url,seq,alleles,lengths,seqid=seqid,method=method)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
132 fh.close()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
133 except Exception, e:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
134 warn_err("Unable to open input file: %s\n" % e, exit_code=1)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
135
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
136 if header:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
137 outputFile.write(header)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
138 for line in results:
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
139 outputFile.write(line)
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
140
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
141 if __name__ == "__main__": __main__()
7a9ecf229480 Uploaded
jjohnson
parents:
diff changeset
142