changeset 98:4378d11f0ed7 draft

implement configurable gene bank links
author Jan Kanis <jan.code@jankanis.nl>
date Mon, 30 Jun 2014 16:49:45 +0200
parents df9fd5f35967
children 02b795b784e1
files blast2html.html.jinja blast2html.py
diffstat 2 files changed, 96 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/blast2html.html.jinja	Tue Jun 24 18:51:26 2014 +0200
+++ b/blast2html.html.jinja	Mon Jun 30 16:49:45 2014 +0200
@@ -532,7 +532,7 @@
                   <td>{{hit.cover}}</td>
                   <td>{{hit.e_value}}</td>
                   <td>{{hit.ident}}</td>
-                  <td><a href="{{genelink(hit.hit|hitid)}}">{{hit.accession}}</a></td>
+                  <td>{{hit.hit|genelink(hit.hit.Hit_accession)}}</td>
                 </tr>
                 {% endfor %}
               </table>
@@ -551,14 +551,13 @@
 
                 <div class=linkheader>
                   <div class=right><a href="#description{{hit|nodeid}}">Descriptions</a></div>
-                  <a class=linkheader href="{{genelink(hit|hitid)}}">GenBank</a>
-                  <a class=linkheader href="{{genelink(hit|hitid, 'graph')}}">Graphics</a>
+                  {{ hit|genelink('Gene Bank', clas='linkheader', display_nolink=False) }}
                 </div>
 
                 <div class=title>
                   <p class=hittitle>{{hit|firsttitle}}</p>
                   <p class=titleinfo>
-                    <span class=b>Sequence ID:</span> <a href="{{genelink(hit|hitid)}}">{{hit|seqid}}</a>
+                    <span class=b>Sequence ID:</span> {{ hit|genelink }}
                     <span class=b>Length:</span> {{hit.Hit_len}}
                     <span class=b>Number of Matches:</span> {{hit.Hit_hsps.Hsp|length}}
                   </p>
@@ -574,7 +573,7 @@
                   <div class=title>
                     <p class=hittitle>{{title.title}}</p>
                     <p class=titleinfo>
-                      <span class=b>Sequence ID:</span> <a href="{{genelink(title.hitid)}}">{{title.id}}</a>
+                      <span class=b>Sequence ID:</span> {{ title|genelink }}
                     </p>
                   </div>
                   {% endfor %}
@@ -585,8 +584,6 @@
                 <div class=hotspot id=hotspot{{hsp|nodeid}}>
                   <p class=range>
                     <span class=range>Range {{hsp.Hsp_num}}: {{hsp['Hsp_hit-from']}} to {{hsp['Hsp_hit-to']}}</span>
-                    <a class=range href="{{genelink(hit|hitid, 'genbank', hsp)}}">GenBank</a>
-                    <a class=range href="{{genelink(hit|hitid, 'graph', hsp)}}">Graphics</a>
                   </p>
 
                   <table class=hotspotstable>
--- a/blast2html.py	Tue Jun 24 18:51:26 2014 +0200
+++ b/blast2html.py	Mon Jun 30 16:49:45 2014 +0200
@@ -15,10 +15,14 @@
 from six.moves import builtins
 from os import path
 from itertools import repeat
+from collections import defaultdict
 import argparse
 from lxml import objectify
 import jinja2
 
+builtin_str = str
+str = six.text_type
+
 
 
 _filters = dict(float='float')
@@ -75,29 +79,16 @@
 
     titles = []
     for t in id_titles[1:]:
-        fullid, title = t.split(' ', 1)
-        hitid, id = fullid.split('|', 2)[1:3]
-        titles.append(dict(id = id,
-                           hitid = hitid,
-                           fullid = fullid,
-                           title = title))
+        id, title = t.split(' ', 1)
+        titles.append(argparse.Namespace(Hit_id = id,
+                                         Hit_def = title,
+                                         Hit_accession = '',
+                                         getroottree = hit.getroottree))
     return titles
 
 @filter
 def hitid(hit):
-    hitid = hit.Hit_id.text
-    s = hitid.split('|', 2)
-    if len(s) >= 2:
-        return s[1]
-    return hitid
-
-@filter
-def seqid(hit):
-    hitid = hit.Hit_id.text
-    s = hitid.split('|', 2)
-    if len(s) >= 3:
-        return s[2]
-    return hitid
+    return str(hit.Hit_id)
 
 
 @filter
@@ -177,13 +168,13 @@
         return 'Minus'
     raise Exception("frame should be either +1 or -1")
 
-def genelink(hit, type='genbank', hsp=None):
-    if not isinstance(hit, six.string_types):
-        hit = hitid(hit)
-    link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
-    if hsp != None:
-        link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
-    return link
+# def genelink(hit, type='genbank', hsp=None):
+#     if not isinstance(hit, six.string_types):
+#         hit = hitid(hit)
+#     link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
+#     if hsp != None:
+#         link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
+#     return link
 
 
 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139
@@ -218,7 +209,7 @@
     javascript snippets.
     """
 
-    value = six.text_type(value)
+    value = str(value)
 
     for bad, good in _js_escapes:
         value = value.replace(bad, good)
@@ -240,9 +231,10 @@
 
     max_scale_labels = 10
 
-    def __init__(self, input, templatedir, templatename):
+    def __init__(self, input, templatedir, templatename, genelinks={}):
         self.input = input
         self.templatename = templatename
+        self.genelinks = genelinks
 
         self.blast = objectify.parse(self.input).getroot()
         self.loader = jinja2.FileSystemLoader(searchpath=templatedir)
@@ -275,7 +267,6 @@
         result = template.render(blast=self.blast,
                                  iterations=self.blast.BlastOutput_iterations.Iteration,
                                  colors=self.colors,
-                                 genelink=genelink,
                                  params=params)
         if six.PY2:
             result = result.encode('utf-8')
@@ -351,8 +342,52 @@
                        e_value = "{0:.4g}".format(min(hsp_val('Hsp_evalue'))),
                        # FIXME: is this the correct formula vv?
                        # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
-                       ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))),
-                       accession = hit.Hit_accession)
+                       ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))))
+
+    @filter
+    def genelink(self, hit, text=None, clas=None, display_nolink=True):
+        if text is None:
+            text = hitid(hit)
+        db = hit.getroottree().getroot().BlastOutput_db
+        if isinstance(self.genelinks, six.string_types):
+            template = self.genelinks
+        else:
+            template = self.genelinks.get(db)
+        if template is None:
+            return text if display_nolink else ''
+        args = dict(id=hitid(hit).split('|'),
+                    fullid=hitid(hit),
+                    defline=str(hit.Hit_def).split('|'),
+                    fulldefline=str(hit.Hit_def),
+                    accession=str(hit.Hit_accession))
+        try:
+            link = template.format(**args)
+        except Exception as e:
+            warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e))
+            return text if display_nolink else ''
+        classattr = 'class="{}" '.format(jinja2.escape(clas)) if clas is not None else ''
+        return jinja2.Markup("<a {}href=\"{}\">{}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text)))
+
+
+def read_genelinks(dir):
+    links = {}
+    for f in ('blastdb.loc', 'blastdb_p.loc', 'blastdb_d.loc'):
+        try:
+            f = open(path.join(dir, f))
+            for l in f.readlines():
+                if l.strip().startswith('#'):
+                    continue
+                line = l.split('\t')
+                try:
+                    links[line[2]] = line[3]
+                except IndexError:
+                    continue
+            f.close()
+        except OSError:
+            continue
+    if not links:
+        warnings.warn("No gene bank link templates found")
+    return links
 
 
 def main():
@@ -374,7 +409,25 @@
     # care too much.
     parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
                         help='The template file to use. Defaults to blast_html.html.jinja')
+    
+    dblink_group = parser.add_mutually_exclusive_group()
+    dblink_group.add_argument('--genelink-template', default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+                              help="""A link template to link hits to a gene bank webpage. The template string is a 
+                              Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, 
+                              {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be 
+                              replaced by the Nth element of the id or defline, where '|' is the field separator. 
 
+                              The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+                              which is a link to the NCBI nucleotide database.""")
+    
+    dblink_group.add_argument('--db-config-dir',
+                              help="""The directory where databases are configured in blastdb*.loc files. These files
+                              are consulted for creating a gene bank link. The files should be tab-separated tables (with lines
+                              starting with '#' ignored), where the third field of a line should be a database path and the fourth
+                              a genebank link template conforming to the --genelink-template option syntax.
+
+                              This option is incompatible with --genelink-template.""")
+    
     args = parser.parse_args()
     if args.input == None:
         args.input = args.positional_arg
@@ -386,7 +439,14 @@
     if not templatedir:
         templatedir = '.'
 
-    b = BlastVisualize(args.input, templatedir, templatename)
+    if args.db_config_dir is None:
+        genelinks = args.genelink_template
+    elif not path.isdir(args.db_config_dir):
+        parser.error('db-config-dir does not exist or is not a directory')
+    else:
+        genelinks = read_genelinks(args.db_config_dir)
+
+    b = BlastVisualize(args.input, templatedir, templatename, genelinks)
     b.render(args.output)