Mercurial > repos > jankanis > blast2html2
comparison visualise.py @ 7:9e7927673089
intermediate commit before converting some tables to divs
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Thu, 08 May 2014 16:51:52 +0200 |
parents | 1df2bfce5c24 |
children | 2fbdf2eb27b4 |
comparison
equal
deleted
inserted
replaced
6:d20ce91e1297 | 7:9e7927673089 |
---|---|
2 | 2 |
3 # Copyright The Hyve B.V. 2014 | 3 # Copyright The Hyve B.V. 2014 |
4 # License: GPL version 3 or higher | 4 # License: GPL version 3 or higher |
5 | 5 |
6 import sys | 6 import sys |
7 import math | |
7 import warnings | 8 import warnings |
8 from itertools import repeat | 9 from itertools import repeat |
9 from lxml import objectify | 10 from lxml import objectify |
10 import jinja2 | 11 import jinja2 |
11 | 12 |
24 | 25 |
25 colors = ['black', 'blue', 'green', 'magenta', 'red'] | 26 colors = ['black', 'blue', 'green', 'magenta', 'red'] |
26 | 27 |
27 blast = objectify.parse('blast xml example1.xml').getroot() | 28 blast = objectify.parse('blast xml example1.xml').getroot() |
28 loader = jinja2.FileSystemLoader(searchpath='.') | 29 loader = jinja2.FileSystemLoader(searchpath='.') |
29 environment = jinja2.Environment(loader=loader) | 30 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) |
30 environment.filters['color'] = lambda length: match_colors[color_idx(length)] | 31 environment.filters['color'] = lambda length: match_colors[color_idx(length)] |
32 | |
33 query_length = int(blast["BlastOutput_query-len"]) | |
34 | |
35 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit | |
36 # sort hits by longest hotspot first | |
37 ordered_hits = sorted(hits, | |
38 key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), | |
39 reverse=True) | |
31 | 40 |
32 def match_colors(): | 41 def match_colors(): |
33 """ | 42 """ |
34 An iterator that yields lists of length-color pairs. | 43 An iterator that yields lists of length-color pairs. |
35 """ | 44 """ |
45 | |
46 percent_multiplier = 100 / query_length | |
36 | 47 |
37 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit | |
38 query_length = blast["BlastOutput_query-len"] | |
39 # sort hits by longest hotspot first | |
40 hits = sorted(hits, key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), reverse=True) | |
41 | |
42 for hit in hits: | 48 for hit in hits: |
43 # sort hotspots from short to long, so we can overwrite index colors of | 49 # sort hotspots from short to long, so we can overwrite index colors of |
44 # short matches with those of long ones. | 50 # short matches with those of long ones. |
45 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) | 51 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) |
46 table = bytearray([255]) * query_length | 52 table = bytearray([255]) * query_length |
47 for hsp in hotspots: | 53 for hsp in hotspots: |
48 frm = hsp['Hsp_query-from'] - 1 | 54 frm = hsp['Hsp_query-from'] - 1 |
49 to = hsp['Hsp_query-to'] - 1 | 55 to = int(hsp['Hsp_query-to']) |
50 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) | 56 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) |
51 | 57 |
52 matches = [] | 58 matches = [] |
53 last = table[0] | 59 last = table[0] |
54 count = 0 | 60 count = 0 |
55 for i in range(int(query_length)): | 61 for i in range(query_length): |
56 if table[i] == last: | 62 if table[i] == last: |
57 count += 1 | 63 count += 1 |
58 continue | 64 continue |
59 matches.append((count, colors[last] if last != 255 else 'none')) | 65 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
60 last = table[i] | 66 last = table[i] |
61 count = 1 | 67 count = 1 |
62 matches.append((count, colors[last] if last != 255 else 'none')) | 68 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
63 | 69 |
64 yield dict(colors=matches, link="#hit"+hit.Hit_num.text) | 70 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def) |
65 | 71 |
66 | 72 |
73 def queryscale(): | |
74 max_labels = 10 | |
75 skip = math.ceil(query_length / max_labels) | |
76 percent_multiplier = 100 / query_length | |
77 for i in range(1, query_length+1): | |
78 if i % skip == 0: | |
79 yield dict(label = i, width = skip * percent_multiplier) | |
80 if query_length % skip != 0: | |
81 yield dict(label = query_length, width = (query_length % skip) * percent_multiplier) | |
82 | |
83 | |
84 def hit_info(): | |
85 | |
86 for hit in ordered_hits: | |
87 hsps = hit.Hit_hsps.Hsp | |
88 | |
89 cover = [False] * query_length | |
90 for hsp in hsps: | |
91 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len'])) | |
92 cover_count = cover.count(True) | |
93 | |
94 def hsp_val(path): | |
95 return (hsp[path] for hsp in hsps) | |
96 | |
97 yield dict(description = hit.Hit_def, | |
98 maxscore = max(hsp_val('Hsp_bit-score')), | |
99 totalscore = sum(hsp_val('Hsp_bit-score')), | |
100 cover = "{:.0%}".format(cover_count / query_length), | |
101 e_value = min(hsp_val('Hsp_evalue')), | |
102 # FIXME: is this the correct formula vv? | |
103 ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)), | |
104 accession = hit.Hit_accession) | |
105 | |
106 | |
67 def main(): | 107 def main(): |
68 template = environment.get_template('visualise.html.jinja') | 108 template = environment.get_template('visualise.html.jinja') |
69 | 109 |
70 params = (('Query ID', blast["BlastOutput_query-ID"]), | 110 params = (('Query ID', blast["BlastOutput_query-ID"]), |
71 ('Query definition', blast["BlastOutput_query-def"]), | 111 ('Query definition', blast["BlastOutput_query-def"]), |
76 | 116 |
77 if len(blast.BlastOutput_iterations.Iteration) > 1: | 117 if len(blast.BlastOutput_iterations.Iteration) > 1: |
78 warnings.warn("Multiple 'Iteration' elements found, showing only the first") | 118 warnings.warn("Multiple 'Iteration' elements found, showing only the first") |
79 | 119 |
80 sys.stdout.write(template.render(blast=blast, | 120 sys.stdout.write(template.render(blast=blast, |
81 hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, | 121 length=query_length, |
122 #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, | |
82 colors=colors, | 123 colors=colors, |
83 match_colors=match_colors(), | 124 match_colors=match_colors(), |
125 queryscale=queryscale(), | |
126 hit_info=hit_info(), | |
84 params=params)) | 127 params=params)) |
85 | 128 |
86 main() | 129 main() |