comparison iedb_api.py @ 0:7a9ecf229480 draft default tip

Uploaded
author jjohnson
date Mon, 28 Sep 2015 11:27:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7a9ecf229480
1 #!/usr/bin/env python
2 """
3 """
4 import sys
5 import os.path
6 import re
7 import optparse
8 import urllib
9 import urllib2
10 from optparse import OptionParser
11
12 mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket']
13 mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope']
14 processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008']
15 mhcnp_methods = ['mhcnp']
16 bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker']
17 prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods}
18
19 def warn_err(msg,exit_code=1):
20 sys.stderr.write(msg)
21 if exit_code:
22 sys.exit(exit_code)
23
24
25 def __main__():
26 #Parse Command Line
27 parser = optparse.OptionParser()
28 parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' )
29 parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' )
30 parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' )
31 parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' )
32 parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' )
33 parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' )
34 parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' )
35 parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' )
36 parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' )
37 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' )
38 (options, args) = parser.parse_args()
39
40 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
41
42 if not options.allele and options.prediction != 'bcell':
43 warn_err('-a allele required\n', exit_code=1)
44
45 if not (options.sequence or options.input):
46 warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1)
47
48 if options.output != None:
49 try:
50 outputPath = os.path.abspath(options.output)
51 outputFile = open(outputPath, 'w')
52 except Exception, e:
53 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
54 else:
55 outputFile = sys.stdout
56
57 url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction
58
59 #TODO parse alleles from the options.alleles file
60 alleles = ','.join(options.allele)
61 lengths = ','.join(options.length)
62 method = options.method
63
64 results = []
65 global header
66 header = None
67
68 sequence_text = []
69 def add_seq(seqid,seq):
70 sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq))
71
72 def query(url,seq,allele,length,seqid=None,method='recommended'):
73 global header
74 params = dict()
75 if method:
76 params['method'] = method
77 params['sequence_text'] = seq
78 params['allele'] = allele
79 params['length'] = length
80 data = urllib.urlencode(params)
81 request = urllib2.Request(url, data)
82 if options.debug:
83 print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq)
84 response = None
85 response = urllib2.urlopen(request)
86 if response and response.getcode() == 200:
87 resp_data = response.readlines()
88 for line in resp_data:
89 if line.find('eptide') > 0:
90 header = "#%s%s" % ("ID\t" if seqid else "", line)
91 continue
92 if seqid:
93 results.append("%s\t%s" % (seqid,line))
94 else:
95 results.append(line)
96 elif not response:
97 warn_err("NO response from IEDB server\n", exit_code=3)
98 else:
99 warn_err("Error connecting to IEDB server\n", exit_code=response.getcode())
100
101 if options.sequence:
102 for i,seq in enumerate(options.sequence):
103 query(url,seq,alleles,lengths,seqid=None,method=method)
104 if options.input:
105 try:
106 fh = open(options.input,'r')
107 if options.column: ## tabular
108 col = int(options.column)
109 idcol = int(options.id_column) if options.id_column else None
110 for i,line in enumerate(fh):
111 fields = line.split('\t')
112 if len(fields) > col:
113 seq = re.sub('[_*]','',fields[col])
114 if re.match(aapat,seq):
115 seqid = fields[idcol] if idcol != None and idcol < len(fields) else None
116 query(url,seq,alleles,lengths,seqid=seqid,method=method)
117 else:
118 warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None)
119 else: ## fasta
120 seqid = None
121 seq = ''
122 for i,line in enumerate(fh):
123 if line.startswith('>'):
124 if seqid and len(seq) > 0:
125 query(url,seq,alleles,lengths,seqid=seqid,method=method)
126 seqid = line[1:].strip()
127 seq = ''
128 else:
129 seq += line.strip()
130 if seqid and len(seq) > 0:
131 query(url,seq,alleles,lengths,seqid=seqid,method=method)
132 fh.close()
133 except Exception, e:
134 warn_err("Unable to open input file: %s\n" % e, exit_code=1)
135
136 if header:
137 outputFile.write(header)
138 for line in results:
139 outputFile.write(line)
140
141 if __name__ == "__main__": __main__()
142