Mercurial > repos > jankanis > blast2html2
annotate visualise.py @ 7:9e7927673089
intermediate commit before converting some tables to divs
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Thu, 08 May 2014 16:51:52 +0200 |
parents | 1df2bfce5c24 |
children | 2fbdf2eb27b4 |
rev | line source |
---|---|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
2 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
3 # Copyright The Hyve B.V. 2014 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
4 # License: GPL version 3 or higher |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
5 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
6 import sys |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
7 import math |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
8 import warnings |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
9 from itertools import repeat |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
10 from lxml import objectify |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
11 import jinja2 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
12 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
13 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
14 def color_idx(length): |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
15 if length < 40: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
16 return 0 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
17 elif length < 50: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
18 return 1 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
19 elif length < 80: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
20 return 2 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
21 elif length < 200: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
22 return 3 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
23 return 4 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
24 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
25 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
26 colors = ['black', 'blue', 'green', 'magenta', 'red'] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
27 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
28 blast = objectify.parse('blast xml example1.xml').getroot() |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
29 loader = jinja2.FileSystemLoader(searchpath='.') |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
30 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
31 environment.filters['color'] = lambda length: match_colors[color_idx(length)] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
32 |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
33 query_length = int(blast["BlastOutput_query-len"]) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
34 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
35 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
36 # sort hits by longest hotspot first |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
37 ordered_hits = sorted(hits, |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
38 key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
39 reverse=True) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
40 |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
41 def match_colors(): |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
42 """ |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
43 An iterator that yields lists of length-color pairs. |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
44 """ |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
45 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
46 percent_multiplier = 100 / query_length |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
47 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
48 for hit in hits: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
49 # sort hotspots from short to long, so we can overwrite index colors of |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
50 # short matches with those of long ones. |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
51 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
52 table = bytearray([255]) * query_length |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
53 for hsp in hotspots: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
54 frm = hsp['Hsp_query-from'] - 1 |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
55 to = int(hsp['Hsp_query-to']) |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
56 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
57 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
58 matches = [] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
59 last = table[0] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
60 count = 0 |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
61 for i in range(query_length): |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
62 if table[i] == last: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
63 count += 1 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
64 continue |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
65 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
66 last = table[i] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
67 count = 1 |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
68 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
69 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
70 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
71 |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
72 |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
73 def queryscale(): |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
74 max_labels = 10 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
75 skip = math.ceil(query_length / max_labels) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
76 percent_multiplier = 100 / query_length |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
77 for i in range(1, query_length+1): |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
78 if i % skip == 0: |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
79 yield dict(label = i, width = skip * percent_multiplier) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
80 if query_length % skip != 0: |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
81 yield dict(label = query_length, width = (query_length % skip) * percent_multiplier) |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
82 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
83 |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
84 def hit_info(): |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
85 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
86 for hit in ordered_hits: |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
87 hsps = hit.Hit_hsps.Hsp |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
88 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
89 cover = [False] * query_length |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
90 for hsp in hsps: |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
91 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len'])) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
92 cover_count = cover.count(True) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
93 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
94 def hsp_val(path): |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
95 return (hsp[path] for hsp in hsps) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
96 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
97 yield dict(description = hit.Hit_def, |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
98 maxscore = max(hsp_val('Hsp_bit-score')), |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
99 totalscore = sum(hsp_val('Hsp_bit-score')), |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
100 cover = "{:.0%}".format(cover_count / query_length), |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
101 e_value = min(hsp_val('Hsp_evalue')), |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
102 # FIXME: is this the correct formula vv? |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
103 ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)), |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
104 accession = hit.Hit_accession) |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
105 |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
106 |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
107 def main(): |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
108 template = environment.get_template('visualise.html.jinja') |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
109 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
110 params = (('Query ID', blast["BlastOutput_query-ID"]), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
111 ('Query definition', blast["BlastOutput_query-def"]), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
112 ('Query length', blast["BlastOutput_query-len"]), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
113 ('Program', blast.BlastOutput_version), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
114 ('Database', blast.BlastOutput_db), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
115 ) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
116 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
117 if len(blast.BlastOutput_iterations.Iteration) > 1: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
118 warnings.warn("Multiple 'Iteration' elements found, showing only the first") |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
119 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
120 sys.stdout.write(template.render(blast=blast, |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
121 length=query_length, |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
122 #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
123 colors=colors, |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
124 match_colors=match_colors(), |
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
125 queryscale=queryscale(), |
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
126 hit_info=hit_info(), |
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
127 params=params)) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
128 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
129 main() |