blast2html2: blast_html.py comparison

comparison blast_html.py @ 19:67ddcb807b7d

make it work with multiple queries

author	Jan Kanis <jan.code@jankanis.nl>
date	Tue, 13 May 2014 18:06:36 +0200
parents	4434ffab721a
children	53cd304c5f26

comparison

equal deleted inserted replaced

-:4434ffab721a
+:67ddcb807b7d
 _filters = {}
 def filter(func_or_name):
 "Decorator to register a function as filter in the current jinja environment"
 if isinstance(func_or_name, str):
 def inner(func):
-_filters[func_or_name] = func
+_filters[func_or_name] = func.__name__
 return func
 return inner
 else:
-_filters[func_or_name.__name__] = func_or_name
+_filters[func_or_name.__name__] = func_or_name.__name__
 return func_or_name
 def color_idx(length):
 if length < 40:
 "       {:7s}  {}\n".format('', hsp.Hsp_midline) +
 "Subject{:>7s}  {}  {}".format(hsp['Hsp_hit-from'].text, hsp.Hsp_hseq, hsp['Hsp_hit-to'])
 )
 @filter('len')
-def hsplen(node):
+def blastxml_len(node):
-return int(node['Hsp_align-len'])
+if node.tag == 'Hsp':
+return int(node['Hsp_align-len'])
+elif node.tag == 'Iteration':
+return int(node['Iteration_query-len'])
+raise Exception("Unknown XML node type: "+node.tag)
 @filter
 def asframe(frame):
 if frame == 1:
 return 'Plus'
 for bad, good in _js_escapes:
 value = value.replace(bad, good)
 return value
+@filter
+def hits(result):
+# sort hits by longest hotspot first
+return sorted(result.Iteration_hits.findall('Hit'),
+key=lambda h: max(blastxml_len(hsp) for hsp in h.Hit_hsps.Hsp),
+reverse=True)
 class BlastVisualize:
 colors = ('black', 'blue', 'green', 'magenta', 'red')
 self.blast = objectify.parse(self.input).getroot()
 self.loader = jinja2.FileSystemLoader(searchpath=templatedir)
 self.environment = jinja2.Environment(loader=self.loader,
 lstrip_blocks=True, trim_blocks=True, autoescape=True)
-self.environment.filters.update(_filters)
+self._addfilters(self.environment)
-self.environment.filters['color'] = lambda length: match_colors[color_idx(length)]
-self.query_length = int(self.blast["BlastOutput_query-len"])
+def _addfilters(self, environment):
-self.hits = self.blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit
+for filtername, funcname in _filters.items():
-# sort hits by longest hotspot first
+try:
-self.ordered_hits = sorted(self.hits,
+environment.filters[filtername] = getattr(self, funcname)
-key=lambda h: max(hsplen(hsp) for hsp in h.Hit_hsps.Hsp),
+except AttributeError:
-reverse=True)
+environment.filters[filtername] = globals()[funcname]
 def render(self, output):
 template = self.environment.get_template(self.templatename)
 params = (('Query ID', self.blast["BlastOutput_query-ID"]),
 ('Query length', self.blast["BlastOutput_query-len"]),
 ('Program', self.blast.BlastOutput_version),
 ('Database', self.blast.BlastOutput_db),
 )
-if len(self.blast.BlastOutput_iterations.Iteration) > 1:
-warnings.warn("Multiple 'Iteration' elements found, showing only the first")
 output.write(template.render(blast=self.blast,
-length=self.query_length,
+iterations=self.blast.BlastOutput_iterations.Iteration,
-hits=self.blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit,
 colors=self.colors,
-match_colors=self.match_colors(),
+# match_colors=self.match_colors(),
-queryscale=self.queryscale(),
+# hit_info=self.hit_info(),
-hit_info=self.hit_info(),
 genelink=genelink,
 params=params))
+@filter
-def match_colors(self):
+def match_colors(self, result):
 """
 An iterator that yields lists of length-color pairs.
 """
-percent_multiplier = 100 / self.query_length
+query_length = blastxml_len(result)
-for hit in self.hits:
+percent_multiplier = 100 / query_length
+for hit in hits(result):
 # sort hotspots from short to long, so we can overwrite index colors of
 # short matches with those of long ones.
-hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsplen(hsp))
+hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: blastxml_len(hsp))
-table = bytearray([255]) * self.query_length
+table = bytearray([255]) * query_length
 for hsp in hotspots:
 frm = hsp['Hsp_query-from'] - 1
 to = int(hsp['Hsp_query-to'])
-table[frm:to] = repeat(color_idx(hsplen(hsp)), to - frm)
+table[frm:to] = repeat(color_idx(blastxml_len(hsp)), to - frm)
 matches = []
 last = table[0]
 count = 0
-for i in range(self.query_length):
+for i in range(query_length):
 if table[i] == last:
 count += 1
 continue
 matches.append((count * percent_multiplier, self.colors[last] if last != 255 else 'transparent'))
 last = table[i]
 count = 1
 matches.append((count * percent_multiplier, self.colors[last] if last != 255 else 'transparent'))
 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit))
+@filter
-def queryscale(self):
+def queryscale(self, result):
-skip = math.ceil(self.query_length / self.max_scale_labels)
+query_length = blastxml_len(result)
-percent_multiplier = 100 / self.query_length
+skip = math.ceil(query_length / self.max_scale_labels)
-for i in range(1, self.query_length+1):
+percent_multiplier = 100 / query_length
+for i in range(1, query_length+1):
 if i % skip == 0:
 yield dict(label = i, width = skip * percent_multiplier)
-if self.query_length % skip != 0:
+if query_length % skip != 0:
-yield dict(label = self.query_length, width = (self.query_length % skip) * percent_multiplier)
+yield dict(label = query_length, width = (query_length % skip) * percent_multiplier)
+@filter
-def hit_info(self):
+def hit_info(self, result):
-for hit in self.ordered_hits:
+query_length = blastxml_len(result)
+for hit in hits(result):
 hsps = hit.Hit_hsps.Hsp
-cover = [False] * self.query_length
+cover = [False] * query_length
 for hsp in hsps:
-cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, hsplen(hsp))
+cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp))
 cover_count = cover.count(True)
 def hsp_val(path):
 return (float(hsp[path]) for hsp in hsps)
 yield dict(hit = hit,
 title = firsttitle(hit),
 link_id = hit.Hit_num,
 maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))),
 totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
-cover = "{:.0%}".format(cover_count / self.query_length),
+cover = "{:.0%}".format(cover_count / query_length),
 e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))),
 # FIXME: is this the correct formula vv?
-ident = "{:.0%}".format(float(min(hsp.Hsp_identity / hsplen(hsp) for hsp in hsps))),
+ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))),
 accession = hit.Hit_accession)
 def main():
 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",

Mercurial > repos > jankanis > blast2html2

comparison blast_html.py @ 19:67ddcb807b7d