changeset 0:7a9ecf229480 draft default tip

Uploaded
author jjohnson
date Mon, 28 Sep 2015 11:27:57 -0400
parents
children
files iedb_api.py iedb_api.xml test-data/alleles.tsv test-data/seqs.fa test-data/seqs.tsv
diffstat 5 files changed, 385 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/iedb_api.py	Mon Sep 28 11:27:57 2015 -0400
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+"""
+"""
+import sys
+import os.path
+import re
+import optparse
+import urllib
+import urllib2
+from optparse import OptionParser
+
+mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket']
+mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope']
+processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008']
+mhcnp_methods = ['mhcnp']
+bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker']
+prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods}
+
+def warn_err(msg,exit_code=1):
+  sys.stderr.write(msg)
+  if exit_code:
+    sys.exit(exit_code)
+
+
+def __main__():
+  #Parse Command Line
+  parser = optparse.OptionParser()
+  parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' )
+  parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' )
+  parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' )
+  parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' )
+  parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' )
+  parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' )
+  parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' )
+  parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' )
+  parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' )
+  parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr'  )
+  (options, args) = parser.parse_args()
+
+  aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'          
+
+  if not options.allele and options.prediction != 'bcell':
+    warn_err('-a allele required\n', exit_code=1)
+
+  if not (options.sequence or options.input): 
+    warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1)
+
+  if options.output != None:
+    try:
+      outputPath = os.path.abspath(options.output)
+      outputFile = open(outputPath, 'w')
+    except Exception, e:
+      warn_err("Unable to open output file: %s\n" % e, exit_code=1)
+  else:
+    outputFile = sys.stdout
+
+  url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction
+
+  #TODO parse alleles from the options.alleles file
+  alleles = ','.join(options.allele)
+  lengths = ','.join(options.length)
+  method = options.method
+
+  results = []
+  global header
+  header = None
+
+  sequence_text = []
+  def add_seq(seqid,seq):
+    sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq))
+
+  def query(url,seq,allele,length,seqid=None,method='recommended'):
+    global header
+    params = dict()
+    if method:
+      params['method'] = method
+    params['sequence_text'] = seq
+    params['allele'] = allele
+    params['length'] = length
+    data = urllib.urlencode(params)
+    request = urllib2.Request(url, data)
+    if options.debug:
+      print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq)
+    response = None
+    response = urllib2.urlopen(request)
+    if response and response.getcode() == 200:
+      resp_data = response.readlines()
+      for line in resp_data:
+        if line.find('eptide') > 0:
+          header = "#%s%s" % ("ID\t" if seqid else "", line)
+          continue
+        if seqid:
+          results.append("%s\t%s" % (seqid,line))
+        else:
+          results.append(line)
+    elif not response:
+      warn_err("NO response from IEDB server\n",  exit_code=3)
+    else:
+      warn_err("Error connecting to IEDB server\n",  exit_code=response.getcode())
+
+  if options.sequence:
+    for i,seq in enumerate(options.sequence):
+      query(url,seq,alleles,lengths,seqid=None,method=method)
+  if options.input:
+    try:
+      fh = open(options.input,'r')
+      if options.column: ## tabular
+        col = int(options.column)
+        idcol = int(options.id_column) if options.id_column else None
+        for i,line in enumerate(fh):
+          fields = line.split('\t')
+          if len(fields) > col:
+            seq = re.sub('[_*]','',fields[col])
+            if re.match(aapat,seq):
+              seqid = fields[idcol] if idcol != None and idcol < len(fields) else None
+              query(url,seq,alleles,lengths,seqid=seqid,method=method)
+            else:
+              warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None)
+      else:  ## fasta
+        seqid = None
+        seq = ''
+        for i,line in enumerate(fh):
+          if line.startswith('>'):
+            if seqid and len(seq) > 0:
+              query(url,seq,alleles,lengths,seqid=seqid,method=method)
+            seqid = line[1:].strip()
+            seq = ''
+          else:
+            seq += line.strip()
+        if seqid and len(seq) > 0:
+          query(url,seq,alleles,lengths,seqid=seqid,method=method)
+      fh.close()
+    except Exception, e:
+      warn_err("Unable to open input file: %s\n" % e, exit_code=1)
+
+  if header:
+    outputFile.write(header)  
+  for line in results:
+    outputFile.write(line)  
+
+if __name__ == "__main__": __main__()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/iedb_api.xml	Mon Sep 28 11:27:57 2015 -0400
@@ -0,0 +1,235 @@
+<tool id="iedb_api" name="IEDB" version="0.1.0">
+    <description>MHC Binding prediction</description>
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="python"><![CDATA[
+        #import re
+        iedb_api.py --prediction=$prediction.tool --method=$prediction.method 
+        #if $sequence.seqsrc == 'fasta':
+          -i $sequence.seq_fasta
+        #else if $sequence.seqsrc == 'tabular':
+          -i $sequence.seq_tsv
+          -c #echo int(str($sequence.pep_col)) - 1
+          #if $sequence.id_col:
+            -C #echo  int(str($sequence.id_col)) - 1
+          #end if
+        #else:
+          #for $seq in str($sequence.seq_text).strip().split():
+            -s $seq.strip()
+          #end for
+        #end if
+        #if $alleles.allelesrc == 'history':
+          #for $line in open(str($alleles.allele_file)):
+            #set $fields = $line.strip().split(',') 
+            #set $allele = $fields[0].strip()
+            #if len($allele) > 0:
+              #if len($fields) > 1: 
+                #for $alen in $fields[1:]:
+                  -a $allele -l $alen
+                #end for
+              #else:
+                #for $alen in str($lengths).split(','):
+                  -a $allele -l $alen
+                #end for
+              #end if
+            #end if
+          #end for
+        #else:
+          #for $word in str($alleles.allele_text).strip().split():
+            #set $fields = $word.strip().split(',') 
+            #set $allele = $fields[0].strip()
+            #if len($allele) > 0:
+              #if len($fields) > 1: 
+                #for $alen in $fields[1:]:
+                  -a $allele -l $alen
+                #end for
+              #else:
+                #for $alen in str($lengths).split(','):
+                  -a $allele -l $alen
+                #end for
+              #end if
+            #end if
+          #end for
+        #end if
+        -o $output
+    ]]></command>
+    <inputs>
+        <conditional name="sequence">
+           <param name="seqsrc" type="select" label="Peptide sequences">
+               <option value="fasta">Fasta file</option>
+               <option value="tabular">From tabular</option>
+               <option value="entry"></option>
+           </param>
+           <when value="fasta">
+               <param name="seq_fasta" type="data" format="fasta" label="Peptide Sequence Fasta"/>
+           </when>
+           <when value="tabular">
+               <param name="seq_tsv" type="data" format="tabular" label="Peptide Sequence Tabular"/>
+               <param name="pep_col" label="Select column with peptides" type="data_column" numerical="false" data_ref="seq_tsv" />
+               <param name="id_col" label="Select column with name" type="data_column" numerical="false" data_ref="seq_tsv" optional="true"/>
+           </when>
+           <when value="entry">
+               <param name="seq_text" type="text" size="80" label="Peptide Sequence"/>
+           </when>
+        </conditional>
+        <conditional name="alleles">
+           <param name="allelesrc" type="select" label="Alleles">
+               <option value="history">From history</option>
+               <option value="entry">Entered</option>
+           </param>
+           <when value="history">
+               <param name="allele_file" type="data" format="txt" label="Alleles file"/>
+               <help>The dataset should have on allele per line. The allele may be followed by an optional comma-separated list of pepttide lengths, e.g.: HLA-A*02:01,8,9</help>
+           </when>
+           <when value="entry">
+               <param name="allele_text" type="text" size="80" label="Alleles">
+                   <help>Enter alleles separated by white space: HLA-A*03:01  HLA-B*07:02  (The peptide lengths may follow each allele: HLA-A*03:01,8,9,10  HLA-B*07:02,9</help>
+                   <validator type="regex" message="IDs separted by commas">^(HLA-([A-C]|D[PQR][AB]1)\*[0-9][[0-9]:[0-9][0-9](,(8|9|10|11|12|13|14|15))*)(\s+HLA-([A-C]|D[PQR][AB]1)\*[0-9][[0-9]:[0-9][0-9](,(8|9|10|11|12|13|14|15))*)*$</validator>
+               </param>
+           </when>
+        </conditional>
+        <param name="lengths" type="select" multiple="true" label="peptide lengths for prediction">
+            <help>Used for any alleles which don't include specified lengths</help>
+            <option value="8">8</option>
+            <option value="9">9</option>
+            <option value="10">10</option>
+            <option value="11">11</option>
+            <option value="12">12</option>
+            <option value="13">13</option>
+            <option value="14">14</option>
+            <option value="15">15</option>
+        </param>
+        <conditional name="prediction">
+           <param name="tool" type="select" label="Prediction">
+               <option value="mhci">MHC-I Binding</option>
+               <option value="mhcii">MHC-II Binding</option>
+               <option value="processing">MHC-I Processing</option>
+               <option value="mhcnp">MHC-NP T-Cell Epitope</option>
+               <option value="bcell">Antibody Epitope Prediction</option>
+           </param>
+           <when value="mhci">
+               <param name="method" type="select" label="prediction method">
+                   <option value="recommended" selected="true">recommended</option>
+                   <option value="consensus">consensus</option>
+                   <option value="netmhcpan">netmhcpan</option>
+                   <option value="ann">ann</option>
+                   <option value="smmpmbec">smmpmbec</option>
+                   <option value="smm">smm</option>
+                   <option value="comblib_sidney2008">comblib_sidney2008</option>
+                   <option value="netmhccons">netmhccons</option>
+                   <option value="pickpocket">pickpocket</option>
+               </param>
+           </when>
+           <when value="mhcii">
+               <param name="method" type="select" label="prediction method">
+                   <option value="recommended" selected="true">recommended</option>
+                   <option value="consensus3">consensus3</option>
+                   <option value="NetMHCIIpan">NetMHCIIpan</option>
+                   <option value="nn_align">nn_align</option>
+                   <option value="smm_align">smm_align</option>
+                   <option value="comblib">comblib</option>
+                   <option value="tepitope">tepitope</option>
+               </param>
+           </when>
+           <when value="processing">
+               <param name="method" type="select" label="prediction method">
+                   <option value="recommended" selected="true">recommended</option>
+                   <option value="consensus">consensus</option>
+                   <option value="netmhcpan">netmhcpan</option>
+                   <option value="ann">ann</option>
+                   <option value="smmpmbec">smmpmbec</option>
+                   <option value="smm">smm</option>
+                   <option value="comblib_sidney2008">comblib_sidney2008</option>
+               </param>
+           </when>
+           <when value="mhcnp">
+               <param name="method" type="select" label="prediction method">
+                   <option value="mhcnp" selected="true">mhcnp</option>
+               </param>
+           </when>
+           <when value="bcell">
+               <param name="method" type="select" label="prediction method">
+                   <option value="Bepipred" selected="true">Bepipred</option>
+                   <option value="Chou-Fasman">Chou-Fasman</option>
+                   <option value="Emini">Emini</option>
+                   <option value="Karplus-Schulz">Karplus-Schulz</option>
+                   <option value="Kolaskar-Tongaonkar">Kolaskar-Tongaonkar</option>
+                   <option value="Parker">Parker</option>
+               </param>
+               <param name="window_size" type="integer" value="" optional="true" min="1" label="window_size" help="window_size should be less than the sequence length, and less than 8 for Karplus-Schulz method"/>
+           </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="seqsrc" value="entry"/>
+            <param name="seq_text" value="SLYNTVATLYCVHQRIDV"/>
+            <param name="allelesrc" value="entry"/>
+            <param name="allele_text" value="HLA-A*01:01,9"/>
+            <param name="tool" value="mhci"/>
+            <param name="method" value="recommended"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="LYNTVATLY" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="seqsrc" value="fasta"/>
+            <param name="seq_fasta" ftype="fasta" value="seqs.fa"/>
+            <param name="allelesrc" value="history"/>
+            <param name="allele_file" ftype="txt" value="alleles.txt"/>
+            <param name="tool" value="mhci"/>
+            <param name="method" value="recommended"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="peptide1" />
+                    <has_text text="AHKVPRRLLK" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="seqsrc" value="tabular"/>
+            <param name="seq_tsv" ftype="tabular" value="seqs.tsv"/>
+            <param name="pep_col" value="3"/>
+            <param name="id_col" value="1"/>
+            <param name="allelesrc" value="history"/>
+            <param name="allele_file" ftype="txt" value="alleles.txt"/>
+            <param name="tool" value="mhci"/>
+            <param name="method" value="recommended"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="peptide1" />
+                    <has_text text="AHKVPRRLLK" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+The IEDB is a free resource, funded by a contract from the National Institute of Allergy and Infectious Diseases. It offers easy searching of experimental data characterizing antibody and T cell epitopes studied in humans, non-human primates, and other animal species. 
+
+This tool retrieves epitope information about input peptide sequences by using the RESTful web services provided by IEDB.  
+The webservices are described at:  http://tools.immuneepitope.org/main/tools-api/
+
+**INPUTS**
+
+  peptide sequences from a fasta file or a column in a tabular file
+
+  HLA alleles either entered as text or on per line in a text file
+
+
+**OUTPUTS**
+  
+  A tabular file containing the results returned from the IEDB web service
+
+    ]]></help>
+    <citations>
+       <citation type="doi">10.1093/nar/gku938</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alleles.tsv	Mon Sep 28 11:27:57 2015 -0400
@@ -0,0 +1,2 @@
+HLA-A*01:01,9
+HLA-A*03:01,10
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seqs.fa	Mon Sep 28 11:27:57 2015 -0400
@@ -0,0 +1,4 @@
+>peptide1
+AGHAHKVPRRLLKAAR
+>peptide2
+ALKAADASADADGSGSGSGSG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seqs.tsv	Mon Sep 28 11:27:57 2015 -0400
@@ -0,0 +1,2 @@
+peptide1	16	AGHAHKVPRRLLKAAR
+peptide2	21	ALKAADASADADGSGSGSGSG