0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 """
|
|
4 import sys
|
|
5 import os.path
|
|
6 import re
|
|
7 import optparse
|
|
8 import urllib
|
|
9 import urllib2
|
|
10 from optparse import OptionParser
|
|
11
|
|
12 mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket']
|
|
13 mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope']
|
|
14 processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008']
|
|
15 mhcnp_methods = ['mhcnp']
|
|
16 bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker']
|
|
17 prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods}
|
|
18
|
|
19 def warn_err(msg,exit_code=1):
|
|
20 sys.stderr.write(msg)
|
|
21 if exit_code:
|
|
22 sys.exit(exit_code)
|
|
23
|
|
24
|
|
25 def __main__():
|
|
26 #Parse Command Line
|
|
27 parser = optparse.OptionParser()
|
|
28 parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' )
|
|
29 parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' )
|
|
30 parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' )
|
|
31 parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' )
|
|
32 parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' )
|
|
33 parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' )
|
|
34 parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' )
|
|
35 parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' )
|
|
36 parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' )
|
|
37 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' )
|
|
38 (options, args) = parser.parse_args()
|
|
39
|
|
40 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
|
|
41
|
|
42 if not options.allele and options.prediction != 'bcell':
|
|
43 warn_err('-a allele required\n', exit_code=1)
|
|
44
|
|
45 if not (options.sequence or options.input):
|
|
46 warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1)
|
|
47
|
|
48 if options.output != None:
|
|
49 try:
|
|
50 outputPath = os.path.abspath(options.output)
|
|
51 outputFile = open(outputPath, 'w')
|
|
52 except Exception, e:
|
|
53 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
|
|
54 else:
|
|
55 outputFile = sys.stdout
|
|
56
|
|
57 url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction
|
|
58
|
|
59 #TODO parse alleles from the options.alleles file
|
|
60 alleles = ','.join(options.allele)
|
|
61 lengths = ','.join(options.length)
|
|
62 method = options.method
|
|
63
|
|
64 results = []
|
|
65 global header
|
|
66 header = None
|
|
67
|
|
68 sequence_text = []
|
|
69 def add_seq(seqid,seq):
|
|
70 sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq))
|
|
71
|
|
72 def query(url,seq,allele,length,seqid=None,method='recommended'):
|
|
73 global header
|
|
74 params = dict()
|
|
75 if method:
|
|
76 params['method'] = method
|
|
77 params['sequence_text'] = seq
|
|
78 params['allele'] = allele
|
|
79 params['length'] = length
|
|
80 data = urllib.urlencode(params)
|
|
81 request = urllib2.Request(url, data)
|
|
82 if options.debug:
|
|
83 print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq)
|
|
84 response = None
|
|
85 response = urllib2.urlopen(request)
|
|
86 if response and response.getcode() == 200:
|
|
87 resp_data = response.readlines()
|
|
88 for line in resp_data:
|
|
89 if line.find('eptide') > 0:
|
|
90 header = "#%s%s" % ("ID\t" if seqid else "", line)
|
|
91 continue
|
|
92 if seqid:
|
|
93 results.append("%s\t%s" % (seqid,line))
|
|
94 else:
|
|
95 results.append(line)
|
|
96 elif not response:
|
|
97 warn_err("NO response from IEDB server\n", exit_code=3)
|
|
98 else:
|
|
99 warn_err("Error connecting to IEDB server\n", exit_code=response.getcode())
|
|
100
|
|
101 if options.sequence:
|
|
102 for i,seq in enumerate(options.sequence):
|
|
103 query(url,seq,alleles,lengths,seqid=None,method=method)
|
|
104 if options.input:
|
|
105 try:
|
|
106 fh = open(options.input,'r')
|
|
107 if options.column: ## tabular
|
|
108 col = int(options.column)
|
|
109 idcol = int(options.id_column) if options.id_column else None
|
|
110 for i,line in enumerate(fh):
|
|
111 fields = line.split('\t')
|
|
112 if len(fields) > col:
|
|
113 seq = re.sub('[_*]','',fields[col])
|
|
114 if re.match(aapat,seq):
|
|
115 seqid = fields[idcol] if idcol != None and idcol < len(fields) else None
|
|
116 query(url,seq,alleles,lengths,seqid=seqid,method=method)
|
|
117 else:
|
|
118 warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None)
|
|
119 else: ## fasta
|
|
120 seqid = None
|
|
121 seq = ''
|
|
122 for i,line in enumerate(fh):
|
|
123 if line.startswith('>'):
|
|
124 if seqid and len(seq) > 0:
|
|
125 query(url,seq,alleles,lengths,seqid=seqid,method=method)
|
|
126 seqid = line[1:].strip()
|
|
127 seq = ''
|
|
128 else:
|
|
129 seq += line.strip()
|
|
130 if seqid and len(seq) > 0:
|
|
131 query(url,seq,alleles,lengths,seqid=seqid,method=method)
|
|
132 fh.close()
|
|
133 except Exception, e:
|
|
134 warn_err("Unable to open input file: %s\n" % e, exit_code=1)
|
|
135
|
|
136 if header:
|
|
137 outputFile.write(header)
|
|
138 for line in results:
|
|
139 outputFile.write(line)
|
|
140
|
|
141 if __name__ == "__main__": __main__()
|
|
142
|