comparison bin/parse_pact_tree.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d67268158946
1 #!/usr/bin/env python
2 import argparse
3 import csv
4 import re
5
6
7 coord_re = re.compile("\{([-\d\.]+),([-\d\.]+)\}")
8
9
10 def parse_rules(handle):
11 # Returns a very raw and literal translation of the out.rules results from PACT
12 def get_nodes(line):
13 return [int(x) for x in line.split()]
14
15 def int_if_intable(a):
16 try:
17 return int(a)
18 except:
19 return a
20
21 def get_map(line, imgfn=int_if_intable):
22 coll = (x.split('->') for x in line.split())
23 return dict([(int_if_intable(a), imgfn(b)) for a, b in coll])
24
25 def parse_coordinate(text):
26 m = coord_re.match(text)
27 return (float(m.group(1)), float(m.group(2)))
28
29 tips = get_nodes(handle.next())
30 trunk_nodes = get_nodes(handle.next())
31 connections = get_map(handle.next())
32 labels = get_map(handle.next())
33 coordinates = get_map(handle.next(), parse_coordinate)
34 tip_names = get_map(handle.next(), str)
35 return dict(tips=tips, trunk_nodes=trunk_nodes, connections=connections, labels=labels,
36 coordinates=coordinates, tip_names=tip_names)
37
38
39 def get_row(parsed_tree, n_id):
40 # This gives us the row data (as seen in final table) for the given n_id value
41 if n_id in parsed_tree['tips']:
42 klass = "tip"
43 name = parsed_tree['tip_names'][n_id]
44 else:
45 klass = "trunk"
46 name = ""
47 try:
48 parent_id = parsed_tree['connections'][n_id]
49 except KeyError:
50 parent_id = n_id
51 klass = "root"
52 label = parsed_tree['labels'][n_id]
53
54 x, y = parsed_tree['coordinates'][n_id]
55 parent_x, parent_y = parsed_tree['coordinates'][parent_id]
56
57 return dict(id=n_id, klass=klass, name=name, parent_id=parent_id, x=x, y=y, parent_x=parent_x,
58 parent_y=parent_y, label=label)
59
60
61 def parsed_to_table(parsed_tree):
62 # Cols are going to be:
63 # id, parent_id, label, klass, name, x, y, parent_x, parent_y,
64 for n_id in parsed_tree['coordinates'].keys():
65 yield get_row(parsed_tree, n_id)
66
67
68 def get_args():
69 parser = argparse.ArgumentParser(prog="parse_pact_tree.py",
70 description="""Utility for parsing the output of PACT into a form renderable by ggplot""")
71 parser.add_argument('input', type=argparse.FileType('r'))
72 parser.add_argument('output', type=argparse.FileType('w'))
73 return parser.parse_args()
74
75
76 def main():
77 # Get args, run the parser, spit out the results into a file
78 args = get_args()
79 data = parse_rules(args.input)
80
81 writer = csv.DictWriter(args.output,
82 fieldnames=["id", "parent_id", "label", "klass", "name", "x", "y", "parent_x", "parent_y"])
83 writer.writeheader()
84
85 for row in parsed_to_table(data):
86 writer.writerow(row)
87
88 args.input.close()
89 args.output.close()
90
91
92 if __name__ == '__main__':
93 main()
94
95
96