Mercurial > repos > jankanis > blast2html2
comparison visualise.py @ 10:2fbdf2eb27b4
All data is displayed now, still some formatting to do
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Fri, 09 May 2014 18:16:48 +0200 |
parents | 9e7927673089 |
children | 7660519f2dc9 |
comparison
equal
deleted
inserted
replaced
9:bbdc8fb0de2b | 10:2fbdf2eb27b4 |
---|---|
9 from itertools import repeat | 9 from itertools import repeat |
10 from lxml import objectify | 10 from lxml import objectify |
11 import jinja2 | 11 import jinja2 |
12 | 12 |
13 | 13 |
14 blast = objectify.parse('blast xml example1.xml').getroot() | |
15 loader = jinja2.FileSystemLoader(searchpath='.') | |
16 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) | |
17 | |
18 def filter(func_or_name): | |
19 if isinstance(func_or_name, str): | |
20 def inner(func): | |
21 environment.filters[func_or_name] = func | |
22 return func | |
23 return inner | |
24 else: | |
25 environment.filters[func_or_name.__name__] = func_or_name | |
26 return func_or_name | |
27 | |
28 | |
14 def color_idx(length): | 29 def color_idx(length): |
15 if length < 40: | 30 if length < 40: |
16 return 0 | 31 return 0 |
17 elif length < 50: | 32 elif length < 50: |
18 return 1 | 33 return 1 |
20 return 2 | 35 return 2 |
21 elif length < 200: | 36 elif length < 200: |
22 return 3 | 37 return 3 |
23 return 4 | 38 return 4 |
24 | 39 |
25 | |
26 colors = ['black', 'blue', 'green', 'magenta', 'red'] | 40 colors = ['black', 'blue', 'green', 'magenta', 'red'] |
27 | 41 |
28 blast = objectify.parse('blast xml example1.xml').getroot() | |
29 loader = jinja2.FileSystemLoader(searchpath='.') | |
30 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) | |
31 environment.filters['color'] = lambda length: match_colors[color_idx(length)] | 42 environment.filters['color'] = lambda length: match_colors[color_idx(length)] |
43 | |
44 @filter | |
45 def fmt(val, fmt): | |
46 return format(float(val), fmt) | |
47 | |
48 @filter | |
49 def firsttitle(hit): | |
50 return hit.Hit_def.text.split('>')[0] | |
51 | |
52 @filter | |
53 def othertitles(hit): | |
54 """Split a hit.Hit_def that contains multiple titles up, splitting out the hit ids from the titles.""" | |
55 id_titles = hit.Hit_def.text.split('>') | |
56 | |
57 titles = [] | |
58 for t in id_titles[1:]: | |
59 fullid, title = t.split(' ', 1) | |
60 id = fullid.split('|', 2)[2] | |
61 titles.append(dict(id = id, | |
62 fullid = fullid, | |
63 title = title)) | |
64 return titles | |
65 | |
66 @filter | |
67 def hitid(hit): | |
68 return hit.Hit_id.text.split('|', 2)[1] | |
69 | |
70 @filter | |
71 def seqid(hit): | |
72 return hit.Hit_id.text.split('|', 2)[2] | |
73 | |
74 @filter | |
75 def alignment_pre(hsp): | |
76 return ( | |
77 "Query {:>7s} {} {}\n".format(hsp['Hsp_query-from'], hsp.Hsp_qseq, hsp['Hsp_query-to']) + | |
78 " {:7s} {}\n".format('', hsp.Hsp_midline) + | |
79 "Subject {:>7s} {} {}".format(hsp['Hsp_hit-from'], hsp.Hsp_hseq, hsp['Hsp_hit-to'])) | |
80 | |
81 @filter('len') | |
82 def hsplen(node): | |
83 return int(node['Hsp_align-len']) | |
84 | |
85 @filter | |
86 def asframe(frame): | |
87 if frame == 1: | |
88 return 'Plus' | |
89 elif frame == -1: | |
90 return 'Minus' | |
91 raise Exception("frame should be either +1 or -1") | |
92 | |
32 | 93 |
33 query_length = int(blast["BlastOutput_query-len"]) | 94 query_length = int(blast["BlastOutput_query-len"]) |
34 | 95 |
35 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit | 96 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit |
36 # sort hits by longest hotspot first | 97 # sort hits by longest hotspot first |
37 ordered_hits = sorted(hits, | 98 ordered_hits = sorted(hits, |
38 key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), | 99 key=lambda h: max(hsplen(hsp) for hsp in h.Hit_hsps.Hsp), |
39 reverse=True) | 100 reverse=True) |
40 | 101 |
41 def match_colors(): | 102 def match_colors(): |
42 """ | 103 """ |
43 An iterator that yields lists of length-color pairs. | 104 An iterator that yields lists of length-color pairs. |
46 percent_multiplier = 100 / query_length | 107 percent_multiplier = 100 / query_length |
47 | 108 |
48 for hit in hits: | 109 for hit in hits: |
49 # sort hotspots from short to long, so we can overwrite index colors of | 110 # sort hotspots from short to long, so we can overwrite index colors of |
50 # short matches with those of long ones. | 111 # short matches with those of long ones. |
51 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) | 112 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsplen(hsp)) |
52 table = bytearray([255]) * query_length | 113 table = bytearray([255]) * query_length |
53 for hsp in hotspots: | 114 for hsp in hotspots: |
54 frm = hsp['Hsp_query-from'] - 1 | 115 frm = hsp['Hsp_query-from'] - 1 |
55 to = int(hsp['Hsp_query-to']) | 116 to = int(hsp['Hsp_query-to']) |
56 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) | 117 table[frm:to] = repeat(color_idx(hsplen(hsp)), to - frm) |
57 | 118 |
58 matches = [] | 119 matches = [] |
59 last = table[0] | 120 last = table[0] |
60 count = 0 | 121 count = 0 |
61 for i in range(query_length): | 122 for i in range(query_length): |
65 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) | 126 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
66 last = table[i] | 127 last = table[i] |
67 count = 1 | 128 count = 1 |
68 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) | 129 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
69 | 130 |
70 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def) | 131 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit)) |
71 | 132 |
72 | 133 |
73 def queryscale(): | 134 def queryscale(): |
74 max_labels = 10 | 135 max_labels = 10 |
75 skip = math.ceil(query_length / max_labels) | 136 skip = math.ceil(query_length / max_labels) |
86 for hit in ordered_hits: | 147 for hit in ordered_hits: |
87 hsps = hit.Hit_hsps.Hsp | 148 hsps = hit.Hit_hsps.Hsp |
88 | 149 |
89 cover = [False] * query_length | 150 cover = [False] * query_length |
90 for hsp in hsps: | 151 for hsp in hsps: |
91 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len'])) | 152 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, hsplen(hsp)) |
92 cover_count = cover.count(True) | 153 cover_count = cover.count(True) |
93 | 154 |
94 def hsp_val(path): | 155 def hsp_val(path): |
95 return (hsp[path] for hsp in hsps) | 156 return (hsp[path] for hsp in hsps) |
96 | 157 |
97 yield dict(description = hit.Hit_def, | 158 yield dict(title = firsttitle(hit), |
98 maxscore = max(hsp_val('Hsp_bit-score')), | 159 link_id = hit.Hit_num, |
99 totalscore = sum(hsp_val('Hsp_bit-score')), | 160 maxscore = "{:.1f}".format(float(max(hsp_val('Hsp_bit-score')))), |
161 totalscore = "{:.1f}".format(float(sum(hsp_val('Hsp_bit-score')))), | |
100 cover = "{:.0%}".format(cover_count / query_length), | 162 cover = "{:.0%}".format(cover_count / query_length), |
101 e_value = min(hsp_val('Hsp_evalue')), | 163 e_value = "{:.4g}".format(float(min(hsp_val('Hsp_evalue')))), |
102 # FIXME: is this the correct formula vv? | 164 # FIXME: is this the correct formula vv? |
103 ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)), | 165 ident = "{:.0%}".format(float(min(hsp.Hsp_identity / hsplen(hsp) for hsp in hsps))), |
104 accession = hit.Hit_accession) | 166 accession = hit.Hit_accession) |
105 | 167 |
106 | 168 |
107 def main(): | 169 def main(): |
108 template = environment.get_template('visualise.html.jinja') | 170 template = environment.get_template('visualise.html.jinja') |
109 | 171 |
110 params = (('Query ID', blast["BlastOutput_query-ID"]), | 172 params = (('Query ID', blast["BlastOutput_query-ID"]), |
111 ('Query definition', blast["BlastOutput_query-def"]), | 173 ('Query definition', blast["BlastOutput_query-def"]), |
117 if len(blast.BlastOutput_iterations.Iteration) > 1: | 179 if len(blast.BlastOutput_iterations.Iteration) > 1: |
118 warnings.warn("Multiple 'Iteration' elements found, showing only the first") | 180 warnings.warn("Multiple 'Iteration' elements found, showing only the first") |
119 | 181 |
120 sys.stdout.write(template.render(blast=blast, | 182 sys.stdout.write(template.render(blast=blast, |
121 length=query_length, | 183 length=query_length, |
122 #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, | 184 hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, |
123 colors=colors, | 185 colors=colors, |
124 match_colors=match_colors(), | 186 match_colors=match_colors(), |
125 queryscale=queryscale(), | 187 queryscale=queryscale(), |
126 hit_info=hit_info(), | 188 hit_info=hit_info(), |
127 params=params)) | 189 params=params)) |