Mercurial > repos > ryotas > cypher_tools
diff tab2graph.py @ 0:aa7a5cc0f59b default tip
commit
| author | ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp> |
|---|---|
| date | Tue, 30 Dec 2014 18:27:26 +0900 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tab2graph.py Tue Dec 30 18:27:26 2014 +0900 @@ -0,0 +1,104 @@ +# USAGE: +# python tab2graph.py test/tab2graph_input.dat 0 output_n.dat output_r.dat \ +# '[{"column":"c1","name":"person","properties":[{"column":"c2","name":"sex"},{"column":"c3","name":"age"}]}, {"column":"c4","name":"dept","properties":[{"column":"c5","name":"tel"}]}]' \ +# '[{"source":"c1","target":"c4","name":"belongs_to","properties":[{"column":"c7","name":"year"}]}]' + +import sys, time, json, sqlite_lib + +argvs = sys.argv + +input_file = argvs[1] +input_header = argvs[2] +output_file_n = argvs[3] +output_file_r = argvs[4] +nodes_json = argvs[5] +relations_json = argvs[6] + +nodes = json.loads(nodes_json) +relations = json.loads(relations_json) + +print(nodes_json) +print(nodes) +print(relations_json) +print(relations) + +conn = sqlite_lib.connect() +sqlite_lib.load(conn, 'input_table', input_file, input_header, '') +cur = conn.cursor() + +# CREATE NODE TABLE +str_create_n = 'CREATE TABLE node_table (id, type);' +print(str_create_n) +time_start = time.time() +cur.execute(str_create_n) +print('Elapsed Time: ' + str(time.time() - time_start) + '\n') + +# LOAD DATA (NODE_TABLE) +str_insert_n = "INSERT INTO node_table" +for n in nodes: + str_insert_n += " SELECT DISTINCT " + n['column'] + ", '" + n['name'] + "' FROM input_table" + if n != nodes[-1]: + str_insert_n += " UNION" + else: + str_insert_n += ";" +print(str_insert_n) +time_start = time.time() +cur.execute(str_insert_n) +conn.commit() +print('Elapsed Time: ' + str(time.time() - time_start) + '\n') + +# CREATE INDEX (NODE_TABLE) +str_index_n = 'CREATE INDEX idx_node ON node_table(id);' +print(str_index_n) +time_start = time.time() +cur.execute(str_index_n) +print('Elapsed Time: ' + str(time.time() - time_start) + '\n') + +# SELECT (NODE) +sql = "" +for n in nodes: + properties = n['properties'] + if len(properties) == 0: + sql_properties = ", '{}'" + else: + sql_properties = ", '{'||" + for p in properties: + sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'" + if p != properties[-1]: + sql_properties += "||','||" + else: + sql_properties += "||'}'" + sql += "SELECT DISTINCT n.rowid, n.type, n.id" + sql_properties + "\n" + sql += "FROM input_table i, node_table n\n" + sql += "WHERE n.id = i." + n['column'] + " AND type = '" + n['name'] + "'\n" + if n != nodes[-1]: + sql += " UNION ALL\n" + else: + sql += ";" +sqlite_lib.execute(conn, sql, output_file_n, '0', '0') + +# SELECT (RELATION) +sql = "" +for r in relations: + properties = r['properties'] + if len(properties) == 0: + sql_properties = ", '{}'" + else: + sql_properties = ", '{'||" + for p in properties: + sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'" + if p != properties[-1]: + sql_properties += "||','||" + else: + sql_properties += "||'}'" + sql += "SELECT DISTINCT n1.rowid, n2.rowid, '" + r['name'] + "'" + sql_properties + "\n" + sql += "FROM input_table i, node_table n1, node_table n2\n" + sql += "WHERE n1.id = i." + r['source'] + " AND n2.id = i." + r['target'] + "\n" + if r != relations[-1]: + sql += " UNION ALL\n" + else: + sql += ";" +sqlite_lib.execute(conn, sql, output_file_r, '0', '0') + +cur.close(); +conn.close();
