annotate tab2graph.py @ 0:aa7a5cc0f59b default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:27:26 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
1 # USAGE:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
2 # python tab2graph.py test/tab2graph_input.dat 0 output_n.dat output_r.dat \
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
3 # '[{"column":"c1","name":"person","properties":[{"column":"c2","name":"sex"},{"column":"c3","name":"age"}]}, {"column":"c4","name":"dept","properties":[{"column":"c5","name":"tel"}]}]' \
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
4 # '[{"source":"c1","target":"c4","name":"belongs_to","properties":[{"column":"c7","name":"year"}]}]'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
5
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
6 import sys, time, json, sqlite_lib
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
7
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
8 argvs = sys.argv
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
9
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
10 input_file = argvs[1]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
11 input_header = argvs[2]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
12 output_file_n = argvs[3]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
13 output_file_r = argvs[4]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
14 nodes_json = argvs[5]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
15 relations_json = argvs[6]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
16
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
17 nodes = json.loads(nodes_json)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
18 relations = json.loads(relations_json)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
19
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
20 print(nodes_json)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
21 print(nodes)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
22 print(relations_json)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
23 print(relations)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
24
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
25 conn = sqlite_lib.connect()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
26 sqlite_lib.load(conn, 'input_table', input_file, input_header, '')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
27 cur = conn.cursor()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
28
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
29 # CREATE NODE TABLE
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
30 str_create_n = 'CREATE TABLE node_table (id, type);'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
31 print(str_create_n)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
32 time_start = time.time()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
33 cur.execute(str_create_n)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
34 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
35
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
36 # LOAD DATA (NODE_TABLE)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
37 str_insert_n = "INSERT INTO node_table"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
38 for n in nodes:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
39 str_insert_n += " SELECT DISTINCT " + n['column'] + ", '" + n['name'] + "' FROM input_table"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
40 if n != nodes[-1]:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
41 str_insert_n += " UNION"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
42 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
43 str_insert_n += ";"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
44 print(str_insert_n)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
45 time_start = time.time()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
46 cur.execute(str_insert_n)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
47 conn.commit()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
48 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
49
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
50 # CREATE INDEX (NODE_TABLE)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
51 str_index_n = 'CREATE INDEX idx_node ON node_table(id);'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
52 print(str_index_n)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
53 time_start = time.time()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
54 cur.execute(str_index_n)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
55 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
56
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
57 # SELECT (NODE)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
58 sql = ""
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
59 for n in nodes:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
60 properties = n['properties']
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
61 if len(properties) == 0:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
62 sql_properties = ", '{}'"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
63 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
64 sql_properties = ", '{'||"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
65 for p in properties:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
66 sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
67 if p != properties[-1]:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
68 sql_properties += "||','||"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
69 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
70 sql_properties += "||'}'"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
71 sql += "SELECT DISTINCT n.rowid, n.type, n.id" + sql_properties + "\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
72 sql += "FROM input_table i, node_table n\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
73 sql += "WHERE n.id = i." + n['column'] + " AND type = '" + n['name'] + "'\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
74 if n != nodes[-1]:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
75 sql += " UNION ALL\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
76 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
77 sql += ";"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
78 sqlite_lib.execute(conn, sql, output_file_n, '0', '0')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
79
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
80 # SELECT (RELATION)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
81 sql = ""
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
82 for r in relations:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
83 properties = r['properties']
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
84 if len(properties) == 0:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
85 sql_properties = ", '{}'"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
86 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
87 sql_properties = ", '{'||"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
88 for p in properties:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
89 sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
90 if p != properties[-1]:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
91 sql_properties += "||','||"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
92 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
93 sql_properties += "||'}'"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
94 sql += "SELECT DISTINCT n1.rowid, n2.rowid, '" + r['name'] + "'" + sql_properties + "\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
95 sql += "FROM input_table i, node_table n1, node_table n2\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
96 sql += "WHERE n1.id = i." + r['source'] + " AND n2.id = i." + r['target'] + "\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
97 if r != relations[-1]:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
98 sql += " UNION ALL\n"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
99 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
100 sql += ";"
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
101 sqlite_lib.execute(conn, sql, output_file_r, '0', '0')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
102
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
103 cur.close();
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
104 conn.close();