Mercurial > repos > ryotas > cypher_tools
view tab2graph.py @ 0:aa7a5cc0f59b default tip
commit
author | ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp> |
---|---|
date | Tue, 30 Dec 2014 18:27:26 +0900 |
parents | |
children |
line wrap: on
line source
# USAGE: # python tab2graph.py test/tab2graph_input.dat 0 output_n.dat output_r.dat \ # '[{"column":"c1","name":"person","properties":[{"column":"c2","name":"sex"},{"column":"c3","name":"age"}]}, {"column":"c4","name":"dept","properties":[{"column":"c5","name":"tel"}]}]' \ # '[{"source":"c1","target":"c4","name":"belongs_to","properties":[{"column":"c7","name":"year"}]}]' import sys, time, json, sqlite_lib argvs = sys.argv input_file = argvs[1] input_header = argvs[2] output_file_n = argvs[3] output_file_r = argvs[4] nodes_json = argvs[5] relations_json = argvs[6] nodes = json.loads(nodes_json) relations = json.loads(relations_json) print(nodes_json) print(nodes) print(relations_json) print(relations) conn = sqlite_lib.connect() sqlite_lib.load(conn, 'input_table', input_file, input_header, '') cur = conn.cursor() # CREATE NODE TABLE str_create_n = 'CREATE TABLE node_table (id, type);' print(str_create_n) time_start = time.time() cur.execute(str_create_n) print('Elapsed Time: ' + str(time.time() - time_start) + '\n') # LOAD DATA (NODE_TABLE) str_insert_n = "INSERT INTO node_table" for n in nodes: str_insert_n += " SELECT DISTINCT " + n['column'] + ", '" + n['name'] + "' FROM input_table" if n != nodes[-1]: str_insert_n += " UNION" else: str_insert_n += ";" print(str_insert_n) time_start = time.time() cur.execute(str_insert_n) conn.commit() print('Elapsed Time: ' + str(time.time() - time_start) + '\n') # CREATE INDEX (NODE_TABLE) str_index_n = 'CREATE INDEX idx_node ON node_table(id);' print(str_index_n) time_start = time.time() cur.execute(str_index_n) print('Elapsed Time: ' + str(time.time() - time_start) + '\n') # SELECT (NODE) sql = "" for n in nodes: properties = n['properties'] if len(properties) == 0: sql_properties = ", '{}'" else: sql_properties = ", '{'||" for p in properties: sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'" if p != properties[-1]: sql_properties += "||','||" else: sql_properties += "||'}'" sql += "SELECT DISTINCT n.rowid, n.type, n.id" + sql_properties + "\n" sql += "FROM input_table i, node_table n\n" sql += "WHERE n.id = i." + n['column'] + " AND type = '" + n['name'] + "'\n" if n != nodes[-1]: sql += " UNION ALL\n" else: sql += ";" sqlite_lib.execute(conn, sql, output_file_n, '0', '0') # SELECT (RELATION) sql = "" for r in relations: properties = r['properties'] if len(properties) == 0: sql_properties = ", '{}'" else: sql_properties = ", '{'||" for p in properties: sql_properties += "'\"" + p['name'] + "\":\"'||i." + p['column'] + "||'\"'" if p != properties[-1]: sql_properties += "||','||" else: sql_properties += "||'}'" sql += "SELECT DISTINCT n1.rowid, n2.rowid, '" + r['name'] + "'" + sql_properties + "\n" sql += "FROM input_table i, node_table n1, node_table n2\n" sql += "WHERE n1.id = i." + r['source'] + " AND n2.id = i." + r['target'] + "\n" if r != relations[-1]: sql += " UNION ALL\n" else: sql += ";" sqlite_lib.execute(conn, sql, output_file_r, '0', '0') cur.close(); conn.close();