Mercurial > repos > jankanis > blast2html2
comparison visualise.py @ 7:9e7927673089
intermediate commit before converting some tables to divs
| author | Jan Kanis <jan.code@jankanis.nl> | 
|---|---|
| date | Thu, 08 May 2014 16:51:52 +0200 | 
| parents | 1df2bfce5c24 | 
| children | 2fbdf2eb27b4 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 6:d20ce91e1297 | 7:9e7927673089 | 
|---|---|
| 2 | 2 | 
| 3 # Copyright The Hyve B.V. 2014 | 3 # Copyright The Hyve B.V. 2014 | 
| 4 # License: GPL version 3 or higher | 4 # License: GPL version 3 or higher | 
| 5 | 5 | 
| 6 import sys | 6 import sys | 
| 7 import math | |
| 7 import warnings | 8 import warnings | 
| 8 from itertools import repeat | 9 from itertools import repeat | 
| 9 from lxml import objectify | 10 from lxml import objectify | 
| 10 import jinja2 | 11 import jinja2 | 
| 11 | 12 | 
| 24 | 25 | 
| 25 colors = ['black', 'blue', 'green', 'magenta', 'red'] | 26 colors = ['black', 'blue', 'green', 'magenta', 'red'] | 
| 26 | 27 | 
| 27 blast = objectify.parse('blast xml example1.xml').getroot() | 28 blast = objectify.parse('blast xml example1.xml').getroot() | 
| 28 loader = jinja2.FileSystemLoader(searchpath='.') | 29 loader = jinja2.FileSystemLoader(searchpath='.') | 
| 29 environment = jinja2.Environment(loader=loader) | 30 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) | 
| 30 environment.filters['color'] = lambda length: match_colors[color_idx(length)] | 31 environment.filters['color'] = lambda length: match_colors[color_idx(length)] | 
| 32 | |
| 33 query_length = int(blast["BlastOutput_query-len"]) | |
| 34 | |
| 35 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit | |
| 36 # sort hits by longest hotspot first | |
| 37 ordered_hits = sorted(hits, | |
| 38 key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), | |
| 39 reverse=True) | |
| 31 | 40 | 
| 32 def match_colors(): | 41 def match_colors(): | 
| 33 """ | 42 """ | 
| 34 An iterator that yields lists of length-color pairs. | 43 An iterator that yields lists of length-color pairs. | 
| 35 """ | 44 """ | 
| 45 | |
| 46 percent_multiplier = 100 / query_length | |
| 36 | 47 | 
| 37 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit | |
| 38 query_length = blast["BlastOutput_query-len"] | |
| 39 # sort hits by longest hotspot first | |
| 40 hits = sorted(hits, key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), reverse=True) | |
| 41 | |
| 42 for hit in hits: | 48 for hit in hits: | 
| 43 # sort hotspots from short to long, so we can overwrite index colors of | 49 # sort hotspots from short to long, so we can overwrite index colors of | 
| 44 # short matches with those of long ones. | 50 # short matches with those of long ones. | 
| 45 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) | 51 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) | 
| 46 table = bytearray([255]) * query_length | 52 table = bytearray([255]) * query_length | 
| 47 for hsp in hotspots: | 53 for hsp in hotspots: | 
| 48 frm = hsp['Hsp_query-from'] - 1 | 54 frm = hsp['Hsp_query-from'] - 1 | 
| 49 to = hsp['Hsp_query-to'] - 1 | 55 to = int(hsp['Hsp_query-to']) | 
| 50 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) | 56 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) | 
| 51 | 57 | 
| 52 matches = [] | 58 matches = [] | 
| 53 last = table[0] | 59 last = table[0] | 
| 54 count = 0 | 60 count = 0 | 
| 55 for i in range(int(query_length)): | 61 for i in range(query_length): | 
| 56 if table[i] == last: | 62 if table[i] == last: | 
| 57 count += 1 | 63 count += 1 | 
| 58 continue | 64 continue | 
| 59 matches.append((count, colors[last] if last != 255 else 'none')) | 65 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) | 
| 60 last = table[i] | 66 last = table[i] | 
| 61 count = 1 | 67 count = 1 | 
| 62 matches.append((count, colors[last] if last != 255 else 'none')) | 68 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) | 
| 63 | 69 | 
| 64 yield dict(colors=matches, link="#hit"+hit.Hit_num.text) | 70 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def) | 
| 65 | 71 | 
| 66 | 72 | 
| 73 def queryscale(): | |
| 74 max_labels = 10 | |
| 75 skip = math.ceil(query_length / max_labels) | |
| 76 percent_multiplier = 100 / query_length | |
| 77 for i in range(1, query_length+1): | |
| 78 if i % skip == 0: | |
| 79 yield dict(label = i, width = skip * percent_multiplier) | |
| 80 if query_length % skip != 0: | |
| 81 yield dict(label = query_length, width = (query_length % skip) * percent_multiplier) | |
| 82 | |
| 83 | |
| 84 def hit_info(): | |
| 85 | |
| 86 for hit in ordered_hits: | |
| 87 hsps = hit.Hit_hsps.Hsp | |
| 88 | |
| 89 cover = [False] * query_length | |
| 90 for hsp in hsps: | |
| 91 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len'])) | |
| 92 cover_count = cover.count(True) | |
| 93 | |
| 94 def hsp_val(path): | |
| 95 return (hsp[path] for hsp in hsps) | |
| 96 | |
| 97 yield dict(description = hit.Hit_def, | |
| 98 maxscore = max(hsp_val('Hsp_bit-score')), | |
| 99 totalscore = sum(hsp_val('Hsp_bit-score')), | |
| 100 cover = "{:.0%}".format(cover_count / query_length), | |
| 101 e_value = min(hsp_val('Hsp_evalue')), | |
| 102 # FIXME: is this the correct formula vv? | |
| 103 ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)), | |
| 104 accession = hit.Hit_accession) | |
| 105 | |
| 106 | |
| 67 def main(): | 107 def main(): | 
| 68 template = environment.get_template('visualise.html.jinja') | 108 template = environment.get_template('visualise.html.jinja') | 
| 69 | 109 | 
| 70 params = (('Query ID', blast["BlastOutput_query-ID"]), | 110 params = (('Query ID', blast["BlastOutput_query-ID"]), | 
| 71 ('Query definition', blast["BlastOutput_query-def"]), | 111 ('Query definition', blast["BlastOutput_query-def"]), | 
| 76 | 116 | 
| 77 if len(blast.BlastOutput_iterations.Iteration) > 1: | 117 if len(blast.BlastOutput_iterations.Iteration) > 1: | 
| 78 warnings.warn("Multiple 'Iteration' elements found, showing only the first") | 118 warnings.warn("Multiple 'Iteration' elements found, showing only the first") | 
| 79 | 119 | 
| 80 sys.stdout.write(template.render(blast=blast, | 120 sys.stdout.write(template.render(blast=blast, | 
| 81 hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, | 121 length=query_length, | 
| 122 #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, | |
| 82 colors=colors, | 123 colors=colors, | 
| 83 match_colors=match_colors(), | 124 match_colors=match_colors(), | 
| 125 queryscale=queryscale(), | |
| 126 hit_info=hit_info(), | |
| 84 params=params)) | 127 params=params)) | 
| 85 | 128 | 
| 86 main() | 129 main() | 
