0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
1 # USAGE:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
2 # python tab2graph.py test/tab2graph_input.dat 0 output_n.dat output_r.dat \
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
3 # '[{"column":"c1","name":"person","properties":[{"column":"c2","name":"sex"},{"column":"c3","name":"age"}]}, {"column":"c4","name":"dept","properties":[{"column":"c5","name":"tel"}]}]' \
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
4 # '[{"source":"c1","target":"c4","name":"belongs_to","properties":[{"column":"c7","name":"year"}]}]'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
5
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
6 import sys, time, json, sqlite_lib
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
7
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
8 argvs = sys.argv
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
9
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
10 input_file = argvs[1]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
11 input_header = argvs[2]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
12 output_file_n = argvs[3]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
13 output_file_r = argvs[4]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
14 nodes_json = argvs[5]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
15 relations_json = argvs[6]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
16
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
17 nodes = json.loads(nodes_json)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
18 relations = json.loads(relations_json)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
19
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
20 print(nodes_json)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
21 print(nodes)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
22 print(relations_json)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
23 print(relations)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
24
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
25 conn = sqlite_lib.connect()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
26 sqlite_lib.load(conn, 'input_table', input_file, input_header, '')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
27 cur = conn.cursor()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
28
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
29 # CREATE NODE TABLE
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
30 str_create_n = 'CREATE TABLE node_table (id, type);'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
31 print(str_create_n)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
32 time_start = time.time()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
33 cur.execute(str_create_n)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
34 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
35
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
36 # LOAD DATA (NODE_TABLE)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
37 str_insert_n = "INSERT INTO node_table"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
38 for n in nodes:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
39 str_insert_n += " SELECT DISTINCT " + n['column'] + ", '" + n['name'] + "' FROM input_table"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
40 if n != nodes[-1]:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
41 str_insert_n += " UNION"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
42 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
43 str_insert_n += ";"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
44 print(str_insert_n)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
45 time_start = time.time()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
46 cur.execute(str_insert_n)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
47 conn.commit()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
48 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
49
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
50 # CREATE INDEX (NODE_TABLE)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
51 str_index_n = 'CREATE INDEX idx_node ON node_table(id);'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
52 print(str_index_n)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
53 time_start = time.time()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
54 cur.execute(str_index_n)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
55 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
56
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
57 # SELECT (NODE)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
58 sql = ""
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
59 for n in nodes:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
60 properties = n['properties']
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
61 if len(properties) == 0:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
62 sql_properties = ", '{}'"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
63 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
64 sql_properties = ", '{'||"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
65 for p in properties:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
66 sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
67 if p != properties[-1]:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
68 sql_properties += "||','||"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
69 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
70 sql_properties += "||'}'"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
71 sql += "SELECT DISTINCT n.rowid, n.type, n.id" + sql_properties + "\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
72 sql += "FROM input_table i, node_table n\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
73 sql += "WHERE n.id = i." + n['column'] + " AND type = '" + n['name'] + "'\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
74 if n != nodes[-1]:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
75 sql += " UNION ALL\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
76 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
77 sql += ";"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
78 sqlite_lib.execute(conn, sql, output_file_n, '0', '0')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
79
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
80 # SELECT (RELATION)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
81 sql = ""
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
82 for r in relations:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
83 properties = r['properties']
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
84 if len(properties) == 0:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
85 sql_properties = ", '{}'"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
86 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
87 sql_properties = ", '{'||"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
88 for p in properties:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
89 sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
90 if p != properties[-1]:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
91 sql_properties += "||','||"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
92 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
93 sql_properties += "||'}'"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
94 sql += "SELECT DISTINCT n1.rowid, n2.rowid, '" + r['name'] + "'" + sql_properties + "\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
95 sql += "FROM input_table i, node_table n1, node_table n2\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
96 sql += "WHERE n1.id = i." + r['source'] + " AND n2.id = i." + r['target'] + "\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
97 if r != relations[-1]:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
98 sql += " UNION ALL\n"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
99 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
100 sql += ";"
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
101 sqlite_lib.execute(conn, sql, output_file_r, '0', '0')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
102
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
103 cur.close();
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
104 conn.close();
|