diff bin/parse_pact_tree.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/parse_pact_tree.py	Mon Oct 12 17:43:33 2015 -0400
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+import argparse
+import csv
+import re
+
+
+coord_re = re.compile("\{([-\d\.]+),([-\d\.]+)\}")
+
+
+def parse_rules(handle):
+    # Returns a very raw and literal translation of the out.rules results from PACT
+    def get_nodes(line):
+        return [int(x) for x in line.split()]
+
+    def int_if_intable(a):
+        try:
+            return int(a)
+        except:
+            return a
+
+    def get_map(line, imgfn=int_if_intable):
+        coll = (x.split('->') for x in line.split())
+        return dict([(int_if_intable(a), imgfn(b)) for a, b in coll])
+
+    def parse_coordinate(text):
+        m = coord_re.match(text)
+        return (float(m.group(1)), float(m.group(2)))
+
+    tips = get_nodes(handle.next())
+    trunk_nodes = get_nodes(handle.next())
+    connections = get_map(handle.next())
+    labels = get_map(handle.next())
+    coordinates = get_map(handle.next(), parse_coordinate)
+    tip_names  = get_map(handle.next(), str)
+    return dict(tips=tips, trunk_nodes=trunk_nodes, connections=connections, labels=labels,
+            coordinates=coordinates, tip_names=tip_names)
+
+
+def get_row(parsed_tree, n_id):
+    # This gives us the row data (as seen in final table) for the given n_id value
+    if n_id in parsed_tree['tips']:
+        klass = "tip"
+        name = parsed_tree['tip_names'][n_id]
+    else:
+        klass = "trunk"
+        name = ""
+    try:
+        parent_id = parsed_tree['connections'][n_id]
+    except KeyError:
+        parent_id = n_id
+        klass = "root"
+    label = parsed_tree['labels'][n_id]
+
+    x, y = parsed_tree['coordinates'][n_id]
+    parent_x, parent_y = parsed_tree['coordinates'][parent_id]
+
+    return dict(id=n_id, klass=klass, name=name, parent_id=parent_id, x=x, y=y, parent_x=parent_x,
+            parent_y=parent_y, label=label)
+
+
+def parsed_to_table(parsed_tree):
+    # Cols are going to be:
+    #   id, parent_id, label, klass, name, x, y, parent_x, parent_y, 
+    for n_id in parsed_tree['coordinates'].keys():
+        yield get_row(parsed_tree, n_id)
+
+
+def get_args():
+    parser = argparse.ArgumentParser(prog="parse_pact_tree.py",
+        description="""Utility for parsing the output of PACT into a form renderable by ggplot""")
+    parser.add_argument('input', type=argparse.FileType('r'))
+    parser.add_argument('output', type=argparse.FileType('w'))
+    return parser.parse_args()
+
+
+def main():
+    # Get args, run the parser, spit out the results into a file
+    args = get_args()
+    data = parse_rules(args.input)
+
+    writer = csv.DictWriter(args.output,
+            fieldnames=["id", "parent_id", "label", "klass", "name", "x", "y", "parent_x", "parent_y"])
+    writer.writeheader()
+
+    for row in parsed_to_table(data):
+        writer.writerow(row)
+
+    args.input.close()
+    args.output.close()
+
+
+if __name__ == '__main__':
+    main()
+
+
+