Mercurial > repos > jankanis > blast2html2
comparison visualise.py @ 10:2fbdf2eb27b4
All data is displayed now, still some formatting to do
| author | Jan Kanis <jan.code@jankanis.nl> |
|---|---|
| date | Fri, 09 May 2014 18:16:48 +0200 |
| parents | 9e7927673089 |
| children | 7660519f2dc9 |
comparison
equal
deleted
inserted
replaced
| 9:bbdc8fb0de2b | 10:2fbdf2eb27b4 |
|---|---|
| 9 from itertools import repeat | 9 from itertools import repeat |
| 10 from lxml import objectify | 10 from lxml import objectify |
| 11 import jinja2 | 11 import jinja2 |
| 12 | 12 |
| 13 | 13 |
| 14 blast = objectify.parse('blast xml example1.xml').getroot() | |
| 15 loader = jinja2.FileSystemLoader(searchpath='.') | |
| 16 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) | |
| 17 | |
| 18 def filter(func_or_name): | |
| 19 if isinstance(func_or_name, str): | |
| 20 def inner(func): | |
| 21 environment.filters[func_or_name] = func | |
| 22 return func | |
| 23 return inner | |
| 24 else: | |
| 25 environment.filters[func_or_name.__name__] = func_or_name | |
| 26 return func_or_name | |
| 27 | |
| 28 | |
| 14 def color_idx(length): | 29 def color_idx(length): |
| 15 if length < 40: | 30 if length < 40: |
| 16 return 0 | 31 return 0 |
| 17 elif length < 50: | 32 elif length < 50: |
| 18 return 1 | 33 return 1 |
| 20 return 2 | 35 return 2 |
| 21 elif length < 200: | 36 elif length < 200: |
| 22 return 3 | 37 return 3 |
| 23 return 4 | 38 return 4 |
| 24 | 39 |
| 25 | |
| 26 colors = ['black', 'blue', 'green', 'magenta', 'red'] | 40 colors = ['black', 'blue', 'green', 'magenta', 'red'] |
| 27 | 41 |
| 28 blast = objectify.parse('blast xml example1.xml').getroot() | |
| 29 loader = jinja2.FileSystemLoader(searchpath='.') | |
| 30 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) | |
| 31 environment.filters['color'] = lambda length: match_colors[color_idx(length)] | 42 environment.filters['color'] = lambda length: match_colors[color_idx(length)] |
| 43 | |
| 44 @filter | |
| 45 def fmt(val, fmt): | |
| 46 return format(float(val), fmt) | |
| 47 | |
| 48 @filter | |
| 49 def firsttitle(hit): | |
| 50 return hit.Hit_def.text.split('>')[0] | |
| 51 | |
| 52 @filter | |
| 53 def othertitles(hit): | |
| 54 """Split a hit.Hit_def that contains multiple titles up, splitting out the hit ids from the titles.""" | |
| 55 id_titles = hit.Hit_def.text.split('>') | |
| 56 | |
| 57 titles = [] | |
| 58 for t in id_titles[1:]: | |
| 59 fullid, title = t.split(' ', 1) | |
| 60 id = fullid.split('|', 2)[2] | |
| 61 titles.append(dict(id = id, | |
| 62 fullid = fullid, | |
| 63 title = title)) | |
| 64 return titles | |
| 65 | |
| 66 @filter | |
| 67 def hitid(hit): | |
| 68 return hit.Hit_id.text.split('|', 2)[1] | |
| 69 | |
| 70 @filter | |
| 71 def seqid(hit): | |
| 72 return hit.Hit_id.text.split('|', 2)[2] | |
| 73 | |
| 74 @filter | |
| 75 def alignment_pre(hsp): | |
| 76 return ( | |
| 77 "Query {:>7s} {} {}\n".format(hsp['Hsp_query-from'], hsp.Hsp_qseq, hsp['Hsp_query-to']) + | |
| 78 " {:7s} {}\n".format('', hsp.Hsp_midline) + | |
| 79 "Subject {:>7s} {} {}".format(hsp['Hsp_hit-from'], hsp.Hsp_hseq, hsp['Hsp_hit-to'])) | |
| 80 | |
| 81 @filter('len') | |
| 82 def hsplen(node): | |
| 83 return int(node['Hsp_align-len']) | |
| 84 | |
| 85 @filter | |
| 86 def asframe(frame): | |
| 87 if frame == 1: | |
| 88 return 'Plus' | |
| 89 elif frame == -1: | |
| 90 return 'Minus' | |
| 91 raise Exception("frame should be either +1 or -1") | |
| 92 | |
| 32 | 93 |
| 33 query_length = int(blast["BlastOutput_query-len"]) | 94 query_length = int(blast["BlastOutput_query-len"]) |
| 34 | 95 |
| 35 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit | 96 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit |
| 36 # sort hits by longest hotspot first | 97 # sort hits by longest hotspot first |
| 37 ordered_hits = sorted(hits, | 98 ordered_hits = sorted(hits, |
| 38 key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), | 99 key=lambda h: max(hsplen(hsp) for hsp in h.Hit_hsps.Hsp), |
| 39 reverse=True) | 100 reverse=True) |
| 40 | 101 |
| 41 def match_colors(): | 102 def match_colors(): |
| 42 """ | 103 """ |
| 43 An iterator that yields lists of length-color pairs. | 104 An iterator that yields lists of length-color pairs. |
| 46 percent_multiplier = 100 / query_length | 107 percent_multiplier = 100 / query_length |
| 47 | 108 |
| 48 for hit in hits: | 109 for hit in hits: |
| 49 # sort hotspots from short to long, so we can overwrite index colors of | 110 # sort hotspots from short to long, so we can overwrite index colors of |
| 50 # short matches with those of long ones. | 111 # short matches with those of long ones. |
| 51 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) | 112 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsplen(hsp)) |
| 52 table = bytearray([255]) * query_length | 113 table = bytearray([255]) * query_length |
| 53 for hsp in hotspots: | 114 for hsp in hotspots: |
| 54 frm = hsp['Hsp_query-from'] - 1 | 115 frm = hsp['Hsp_query-from'] - 1 |
| 55 to = int(hsp['Hsp_query-to']) | 116 to = int(hsp['Hsp_query-to']) |
| 56 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) | 117 table[frm:to] = repeat(color_idx(hsplen(hsp)), to - frm) |
| 57 | 118 |
| 58 matches = [] | 119 matches = [] |
| 59 last = table[0] | 120 last = table[0] |
| 60 count = 0 | 121 count = 0 |
| 61 for i in range(query_length): | 122 for i in range(query_length): |
| 65 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) | 126 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
| 66 last = table[i] | 127 last = table[i] |
| 67 count = 1 | 128 count = 1 |
| 68 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) | 129 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) |
| 69 | 130 |
| 70 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def) | 131 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit)) |
| 71 | 132 |
| 72 | 133 |
| 73 def queryscale(): | 134 def queryscale(): |
| 74 max_labels = 10 | 135 max_labels = 10 |
| 75 skip = math.ceil(query_length / max_labels) | 136 skip = math.ceil(query_length / max_labels) |
| 86 for hit in ordered_hits: | 147 for hit in ordered_hits: |
| 87 hsps = hit.Hit_hsps.Hsp | 148 hsps = hit.Hit_hsps.Hsp |
| 88 | 149 |
| 89 cover = [False] * query_length | 150 cover = [False] * query_length |
| 90 for hsp in hsps: | 151 for hsp in hsps: |
| 91 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len'])) | 152 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, hsplen(hsp)) |
| 92 cover_count = cover.count(True) | 153 cover_count = cover.count(True) |
| 93 | 154 |
| 94 def hsp_val(path): | 155 def hsp_val(path): |
| 95 return (hsp[path] for hsp in hsps) | 156 return (hsp[path] for hsp in hsps) |
| 96 | 157 |
| 97 yield dict(description = hit.Hit_def, | 158 yield dict(title = firsttitle(hit), |
| 98 maxscore = max(hsp_val('Hsp_bit-score')), | 159 link_id = hit.Hit_num, |
| 99 totalscore = sum(hsp_val('Hsp_bit-score')), | 160 maxscore = "{:.1f}".format(float(max(hsp_val('Hsp_bit-score')))), |
| 161 totalscore = "{:.1f}".format(float(sum(hsp_val('Hsp_bit-score')))), | |
| 100 cover = "{:.0%}".format(cover_count / query_length), | 162 cover = "{:.0%}".format(cover_count / query_length), |
| 101 e_value = min(hsp_val('Hsp_evalue')), | 163 e_value = "{:.4g}".format(float(min(hsp_val('Hsp_evalue')))), |
| 102 # FIXME: is this the correct formula vv? | 164 # FIXME: is this the correct formula vv? |
| 103 ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)), | 165 ident = "{:.0%}".format(float(min(hsp.Hsp_identity / hsplen(hsp) for hsp in hsps))), |
| 104 accession = hit.Hit_accession) | 166 accession = hit.Hit_accession) |
| 105 | 167 |
| 106 | 168 |
| 107 def main(): | 169 def main(): |
| 108 template = environment.get_template('visualise.html.jinja') | 170 template = environment.get_template('visualise.html.jinja') |
| 109 | 171 |
| 110 params = (('Query ID', blast["BlastOutput_query-ID"]), | 172 params = (('Query ID', blast["BlastOutput_query-ID"]), |
| 111 ('Query definition', blast["BlastOutput_query-def"]), | 173 ('Query definition', blast["BlastOutput_query-def"]), |
| 117 if len(blast.BlastOutput_iterations.Iteration) > 1: | 179 if len(blast.BlastOutput_iterations.Iteration) > 1: |
| 118 warnings.warn("Multiple 'Iteration' elements found, showing only the first") | 180 warnings.warn("Multiple 'Iteration' elements found, showing only the first") |
| 119 | 181 |
| 120 sys.stdout.write(template.render(blast=blast, | 182 sys.stdout.write(template.render(blast=blast, |
| 121 length=query_length, | 183 length=query_length, |
| 122 #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, | 184 hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, |
| 123 colors=colors, | 185 colors=colors, |
| 124 match_colors=match_colors(), | 186 match_colors=match_colors(), |
| 125 queryscale=queryscale(), | 187 queryscale=queryscale(), |
| 126 hit_info=hit_info(), | 188 hit_info=hit_info(), |
| 127 params=params)) | 189 params=params)) |
