Mercurial > repos > bcclaywell > argo_navis
comparison bin/parse_pact_tree.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 import csv | |
4 import re | |
5 | |
6 | |
7 coord_re = re.compile("\{([-\d\.]+),([-\d\.]+)\}") | |
8 | |
9 | |
10 def parse_rules(handle): | |
11 # Returns a very raw and literal translation of the out.rules results from PACT | |
12 def get_nodes(line): | |
13 return [int(x) for x in line.split()] | |
14 | |
15 def int_if_intable(a): | |
16 try: | |
17 return int(a) | |
18 except: | |
19 return a | |
20 | |
21 def get_map(line, imgfn=int_if_intable): | |
22 coll = (x.split('->') for x in line.split()) | |
23 return dict([(int_if_intable(a), imgfn(b)) for a, b in coll]) | |
24 | |
25 def parse_coordinate(text): | |
26 m = coord_re.match(text) | |
27 return (float(m.group(1)), float(m.group(2))) | |
28 | |
29 tips = get_nodes(handle.next()) | |
30 trunk_nodes = get_nodes(handle.next()) | |
31 connections = get_map(handle.next()) | |
32 labels = get_map(handle.next()) | |
33 coordinates = get_map(handle.next(), parse_coordinate) | |
34 tip_names = get_map(handle.next(), str) | |
35 return dict(tips=tips, trunk_nodes=trunk_nodes, connections=connections, labels=labels, | |
36 coordinates=coordinates, tip_names=tip_names) | |
37 | |
38 | |
39 def get_row(parsed_tree, n_id): | |
40 # This gives us the row data (as seen in final table) for the given n_id value | |
41 if n_id in parsed_tree['tips']: | |
42 klass = "tip" | |
43 name = parsed_tree['tip_names'][n_id] | |
44 else: | |
45 klass = "trunk" | |
46 name = "" | |
47 try: | |
48 parent_id = parsed_tree['connections'][n_id] | |
49 except KeyError: | |
50 parent_id = n_id | |
51 klass = "root" | |
52 label = parsed_tree['labels'][n_id] | |
53 | |
54 x, y = parsed_tree['coordinates'][n_id] | |
55 parent_x, parent_y = parsed_tree['coordinates'][parent_id] | |
56 | |
57 return dict(id=n_id, klass=klass, name=name, parent_id=parent_id, x=x, y=y, parent_x=parent_x, | |
58 parent_y=parent_y, label=label) | |
59 | |
60 | |
61 def parsed_to_table(parsed_tree): | |
62 # Cols are going to be: | |
63 # id, parent_id, label, klass, name, x, y, parent_x, parent_y, | |
64 for n_id in parsed_tree['coordinates'].keys(): | |
65 yield get_row(parsed_tree, n_id) | |
66 | |
67 | |
68 def get_args(): | |
69 parser = argparse.ArgumentParser(prog="parse_pact_tree.py", | |
70 description="""Utility for parsing the output of PACT into a form renderable by ggplot""") | |
71 parser.add_argument('input', type=argparse.FileType('r')) | |
72 parser.add_argument('output', type=argparse.FileType('w')) | |
73 return parser.parse_args() | |
74 | |
75 | |
76 def main(): | |
77 # Get args, run the parser, spit out the results into a file | |
78 args = get_args() | |
79 data = parse_rules(args.input) | |
80 | |
81 writer = csv.DictWriter(args.output, | |
82 fieldnames=["id", "parent_id", "label", "klass", "name", "x", "y", "parent_x", "parent_y"]) | |
83 writer.writeheader() | |
84 | |
85 for row in parsed_to_table(data): | |
86 writer.writerow(row) | |
87 | |
88 args.input.close() | |
89 args.output.close() | |
90 | |
91 | |
92 if __name__ == '__main__': | |
93 main() | |
94 | |
95 | |
96 |