Mercurial > repos > jjohnson > iedb_api
view iedb_api.py @ 0:7a9ecf229480 draft default tip
Uploaded
author | jjohnson |
---|---|
date | Mon, 28 Sep 2015 11:27:57 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python """ """ import sys import os.path import re import optparse import urllib import urllib2 from optparse import OptionParser mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket'] mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope'] processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008'] mhcnp_methods = ['mhcnp'] bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker'] prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods} def warn_err(msg,exit_code=1): sys.stderr.write(msg) if exit_code: sys.exit(exit_code) def __main__(): #Parse Command Line parser = optparse.OptionParser() parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' ) parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' ) parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' ) parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' ) parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' ) parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' ) parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' ) parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' ) parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' ) parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' ) (options, args) = parser.parse_args() aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' if not options.allele and options.prediction != 'bcell': warn_err('-a allele required\n', exit_code=1) if not (options.sequence or options.input): warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1) if options.output != None: try: outputPath = os.path.abspath(options.output) outputFile = open(outputPath, 'w') except Exception, e: warn_err("Unable to open output file: %s\n" % e, exit_code=1) else: outputFile = sys.stdout url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction #TODO parse alleles from the options.alleles file alleles = ','.join(options.allele) lengths = ','.join(options.length) method = options.method results = [] global header header = None sequence_text = [] def add_seq(seqid,seq): sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq)) def query(url,seq,allele,length,seqid=None,method='recommended'): global header params = dict() if method: params['method'] = method params['sequence_text'] = seq params['allele'] = allele params['length'] = length data = urllib.urlencode(params) request = urllib2.Request(url, data) if options.debug: print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq) response = None response = urllib2.urlopen(request) if response and response.getcode() == 200: resp_data = response.readlines() for line in resp_data: if line.find('eptide') > 0: header = "#%s%s" % ("ID\t" if seqid else "", line) continue if seqid: results.append("%s\t%s" % (seqid,line)) else: results.append(line) elif not response: warn_err("NO response from IEDB server\n", exit_code=3) else: warn_err("Error connecting to IEDB server\n", exit_code=response.getcode()) if options.sequence: for i,seq in enumerate(options.sequence): query(url,seq,alleles,lengths,seqid=None,method=method) if options.input: try: fh = open(options.input,'r') if options.column: ## tabular col = int(options.column) idcol = int(options.id_column) if options.id_column else None for i,line in enumerate(fh): fields = line.split('\t') if len(fields) > col: seq = re.sub('[_*]','',fields[col]) if re.match(aapat,seq): seqid = fields[idcol] if idcol != None and idcol < len(fields) else None query(url,seq,alleles,lengths,seqid=seqid,method=method) else: warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None) else: ## fasta seqid = None seq = '' for i,line in enumerate(fh): if line.startswith('>'): if seqid and len(seq) > 0: query(url,seq,alleles,lengths,seqid=seqid,method=method) seqid = line[1:].strip() seq = '' else: seq += line.strip() if seqid and len(seq) > 0: query(url,seq,alleles,lengths,seqid=seqid,method=method) fh.close() except Exception, e: warn_err("Unable to open input file: %s\n" % e, exit_code=1) if header: outputFile.write(header) for line in results: outputFile.write(line) if __name__ == "__main__": __main__()