Mercurial > repos > jankanis > blast2html
changeset 98:4378d11f0ed7 draft
implement configurable gene bank links
| author | Jan Kanis <jan.code@jankanis.nl> | 
|---|---|
| date | Mon, 30 Jun 2014 16:49:45 +0200 | 
| parents | df9fd5f35967 | 
| children | 02b795b784e1 | 
| files | blast2html.html.jinja blast2html.py | 
| diffstat | 2 files changed, 96 insertions(+), 39 deletions(-) [+] | 
line wrap: on
 line diff
--- a/blast2html.html.jinja Tue Jun 24 18:51:26 2014 +0200 +++ b/blast2html.html.jinja Mon Jun 30 16:49:45 2014 +0200 @@ -532,7 +532,7 @@ <td>{{hit.cover}}</td> <td>{{hit.e_value}}</td> <td>{{hit.ident}}</td> - <td><a href="{{genelink(hit.hit|hitid)}}">{{hit.accession}}</a></td> + <td>{{hit.hit|genelink(hit.hit.Hit_accession)}}</td> </tr> {% endfor %} </table> @@ -551,14 +551,13 @@ <div class=linkheader> <div class=right><a href="#description{{hit|nodeid}}">Descriptions</a></div> - <a class=linkheader href="{{genelink(hit|hitid)}}">GenBank</a> - <a class=linkheader href="{{genelink(hit|hitid, 'graph')}}">Graphics</a> + {{ hit|genelink('Gene Bank', clas='linkheader', display_nolink=False) }} </div> <div class=title> <p class=hittitle>{{hit|firsttitle}}</p> <p class=titleinfo> - <span class=b>Sequence ID:</span> <a href="{{genelink(hit|hitid)}}">{{hit|seqid}}</a> + <span class=b>Sequence ID:</span> {{ hit|genelink }} <span class=b>Length:</span> {{hit.Hit_len}} <span class=b>Number of Matches:</span> {{hit.Hit_hsps.Hsp|length}} </p> @@ -574,7 +573,7 @@ <div class=title> <p class=hittitle>{{title.title}}</p> <p class=titleinfo> - <span class=b>Sequence ID:</span> <a href="{{genelink(title.hitid)}}">{{title.id}}</a> + <span class=b>Sequence ID:</span> {{ title|genelink }} </p> </div> {% endfor %} @@ -585,8 +584,6 @@ <div class=hotspot id=hotspot{{hsp|nodeid}}> <p class=range> <span class=range>Range {{hsp.Hsp_num}}: {{hsp['Hsp_hit-from']}} to {{hsp['Hsp_hit-to']}}</span> - <a class=range href="{{genelink(hit|hitid, 'genbank', hsp)}}">GenBank</a> - <a class=range href="{{genelink(hit|hitid, 'graph', hsp)}}">Graphics</a> </p> <table class=hotspotstable>
--- a/blast2html.py Tue Jun 24 18:51:26 2014 +0200 +++ b/blast2html.py Mon Jun 30 16:49:45 2014 +0200 @@ -15,10 +15,14 @@ from six.moves import builtins from os import path from itertools import repeat +from collections import defaultdict import argparse from lxml import objectify import jinja2 +builtin_str = str +str = six.text_type + _filters = dict(float='float') @@ -75,29 +79,16 @@ titles = [] for t in id_titles[1:]: - fullid, title = t.split(' ', 1) - hitid, id = fullid.split('|', 2)[1:3] - titles.append(dict(id = id, - hitid = hitid, - fullid = fullid, - title = title)) + id, title = t.split(' ', 1) + titles.append(argparse.Namespace(Hit_id = id, + Hit_def = title, + Hit_accession = '', + getroottree = hit.getroottree)) return titles @filter def hitid(hit): - hitid = hit.Hit_id.text - s = hitid.split('|', 2) - if len(s) >= 2: - return s[1] - return hitid - -@filter -def seqid(hit): - hitid = hit.Hit_id.text - s = hitid.split('|', 2) - if len(s) >= 3: - return s[2] - return hitid + return str(hit.Hit_id) @filter @@ -177,13 +168,13 @@ return 'Minus' raise Exception("frame should be either +1 or -1") -def genelink(hit, type='genbank', hsp=None): - if not isinstance(hit, six.string_types): - hit = hitid(hit) - link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type) - if hsp != None: - link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to']) - return link +# def genelink(hit, type='genbank', hsp=None): +# if not isinstance(hit, six.string_types): +# hit = hitid(hit) +# link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type) +# if hsp != None: +# link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to']) +# return link # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139 @@ -218,7 +209,7 @@ javascript snippets. """ - value = six.text_type(value) + value = str(value) for bad, good in _js_escapes: value = value.replace(bad, good) @@ -240,9 +231,10 @@ max_scale_labels = 10 - def __init__(self, input, templatedir, templatename): + def __init__(self, input, templatedir, templatename, genelinks={}): self.input = input self.templatename = templatename + self.genelinks = genelinks self.blast = objectify.parse(self.input).getroot() self.loader = jinja2.FileSystemLoader(searchpath=templatedir) @@ -275,7 +267,6 @@ result = template.render(blast=self.blast, iterations=self.blast.BlastOutput_iterations.Iteration, colors=self.colors, - genelink=genelink, params=params) if six.PY2: result = result.encode('utf-8') @@ -351,8 +342,52 @@ e_value = "{0:.4g}".format(min(hsp_val('Hsp_evalue'))), # FIXME: is this the correct formula vv? # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 - ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))), - accession = hit.Hit_accession) + ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) + + @filter + def genelink(self, hit, text=None, clas=None, display_nolink=True): + if text is None: + text = hitid(hit) + db = hit.getroottree().getroot().BlastOutput_db + if isinstance(self.genelinks, six.string_types): + template = self.genelinks + else: + template = self.genelinks.get(db) + if template is None: + return text if display_nolink else '' + args = dict(id=hitid(hit).split('|'), + fullid=hitid(hit), + defline=str(hit.Hit_def).split('|'), + fulldefline=str(hit.Hit_def), + accession=str(hit.Hit_accession)) + try: + link = template.format(**args) + except Exception as e: + warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) + return text if display_nolink else '' + classattr = 'class="{}" '.format(jinja2.escape(clas)) if clas is not None else '' + return jinja2.Markup("<a {}href=\"{}\">{}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) + + +def read_genelinks(dir): + links = {} + for f in ('blastdb.loc', 'blastdb_p.loc', 'blastdb_d.loc'): + try: + f = open(path.join(dir, f)) + for l in f.readlines(): + if l.strip().startswith('#'): + continue + line = l.split('\t') + try: + links[line[2]] = line[3] + except IndexError: + continue + f.close() + except OSError: + continue + if not links: + warnings.warn("No gene bank link templates found") + return links def main(): @@ -374,7 +409,25 @@ # care too much. parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, help='The template file to use. Defaults to blast_html.html.jinja') + + dblink_group = parser.add_mutually_exclusive_group() + dblink_group.add_argument('--genelink-template', default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', + help="""A link template to link hits to a gene bank webpage. The template string is a + Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, + {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be + replaced by the Nth element of the id or defline, where '|' is the field separator. + The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', + which is a link to the NCBI nucleotide database.""") + + dblink_group.add_argument('--db-config-dir', + help="""The directory where databases are configured in blastdb*.loc files. These files + are consulted for creating a gene bank link. The files should be tab-separated tables (with lines + starting with '#' ignored), where the third field of a line should be a database path and the fourth + a genebank link template conforming to the --genelink-template option syntax. + + This option is incompatible with --genelink-template.""") + args = parser.parse_args() if args.input == None: args.input = args.positional_arg @@ -386,7 +439,14 @@ if not templatedir: templatedir = '.' - b = BlastVisualize(args.input, templatedir, templatename) + if args.db_config_dir is None: + genelinks = args.genelink_template + elif not path.isdir(args.db_config_dir): + parser.error('db-config-dir does not exist or is not a directory') + else: + genelinks = read_genelinks(args.db_config_dir) + + b = BlastVisualize(args.input, templatedir, templatename, genelinks) b.render(args.output)
