comparison tab2graph.py @ 0:aa7a5cc0f59b default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:27:26 +0900
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:aa7a5cc0f59b
1 # USAGE:
2 # python tab2graph.py test/tab2graph_input.dat 0 output_n.dat output_r.dat \
3 # '[{"column":"c1","name":"person","properties":[{"column":"c2","name":"sex"},{"column":"c3","name":"age"}]}, {"column":"c4","name":"dept","properties":[{"column":"c5","name":"tel"}]}]' \
4 # '[{"source":"c1","target":"c4","name":"belongs_to","properties":[{"column":"c7","name":"year"}]}]'
5
6 import sys, time, json, sqlite_lib
7
8 argvs = sys.argv
9
10 input_file = argvs[1]
11 input_header = argvs[2]
12 output_file_n = argvs[3]
13 output_file_r = argvs[4]
14 nodes_json = argvs[5]
15 relations_json = argvs[6]
16
17 nodes = json.loads(nodes_json)
18 relations = json.loads(relations_json)
19
20 print(nodes_json)
21 print(nodes)
22 print(relations_json)
23 print(relations)
24
25 conn = sqlite_lib.connect()
26 sqlite_lib.load(conn, 'input_table', input_file, input_header, '')
27 cur = conn.cursor()
28
29 # CREATE NODE TABLE
30 str_create_n = 'CREATE TABLE node_table (id, type);'
31 print(str_create_n)
32 time_start = time.time()
33 cur.execute(str_create_n)
34 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
35
36 # LOAD DATA (NODE_TABLE)
37 str_insert_n = "INSERT INTO node_table"
38 for n in nodes:
39 str_insert_n += " SELECT DISTINCT " + n['column'] + ", '" + n['name'] + "' FROM input_table"
40 if n != nodes[-1]:
41 str_insert_n += " UNION"
42 else:
43 str_insert_n += ";"
44 print(str_insert_n)
45 time_start = time.time()
46 cur.execute(str_insert_n)
47 conn.commit()
48 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
49
50 # CREATE INDEX (NODE_TABLE)
51 str_index_n = 'CREATE INDEX idx_node ON node_table(id);'
52 print(str_index_n)
53 time_start = time.time()
54 cur.execute(str_index_n)
55 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
56
57 # SELECT (NODE)
58 sql = ""
59 for n in nodes:
60 properties = n['properties']
61 if len(properties) == 0:
62 sql_properties = ", '{}'"
63 else:
64 sql_properties = ", '{'||"
65 for p in properties:
66 sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'"
67 if p != properties[-1]:
68 sql_properties += "||','||"
69 else:
70 sql_properties += "||'}'"
71 sql += "SELECT DISTINCT n.rowid, n.type, n.id" + sql_properties + "\n"
72 sql += "FROM input_table i, node_table n\n"
73 sql += "WHERE n.id = i." + n['column'] + " AND type = '" + n['name'] + "'\n"
74 if n != nodes[-1]:
75 sql += " UNION ALL\n"
76 else:
77 sql += ";"
78 sqlite_lib.execute(conn, sql, output_file_n, '0', '0')
79
80 # SELECT (RELATION)
81 sql = ""
82 for r in relations:
83 properties = r['properties']
84 if len(properties) == 0:
85 sql_properties = ", '{}'"
86 else:
87 sql_properties = ", '{'||"
88 for p in properties:
89 sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'"
90 if p != properties[-1]:
91 sql_properties += "||','||"
92 else:
93 sql_properties += "||'}'"
94 sql += "SELECT DISTINCT n1.rowid, n2.rowid, '" + r['name'] + "'" + sql_properties + "\n"
95 sql += "FROM input_table i, node_table n1, node_table n2\n"
96 sql += "WHERE n1.id = i." + r['source'] + " AND n2.id = i." + r['target'] + "\n"
97 if r != relations[-1]:
98 sql += " UNION ALL\n"
99 else:
100 sql += ";"
101 sqlite_lib.execute(conn, sql, output_file_r, '0', '0')
102
103 cur.close();
104 conn.close();