Mercurial > repos > ryotas > cypher_tools
diff sqlite_lib.py @ 0:aa7a5cc0f59b default tip
commit
| author | ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp> |
|---|---|
| date | Tue, 30 Dec 2014 18:27:26 +0900 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sqlite_lib.py Tue Dec 30 18:27:26 2014 +0900 @@ -0,0 +1,140 @@ +# -*- coding:utf-8 -*- + +""" +sqlite_lib +""" + +__author__ = 'Sem4j' +__version__ = '0.0.2' +__date__ = '13 Nov 2013' + +import csv, sqlite3, time + +def connect(): + conn = sqlite3.connect(':memory:') + conn.row_factory = sqlite3.Row + conn.text_factory = lambda x: unicode(x, "utf-8", "ignore") + return conn + +def load(conn, table_name, input_file, input_header, str_index): + + cur = conn.cursor() + + with open(input_file,'rb') as infile: + dr = csv.reader(infile, delimiter='\t') + to_db = [] + row_count = 0 + for row in dr: + row_count += 1 + values =[] + col_count = 0 + for col in row: + col_count += 1 + values.append(col) + if input_header == '1' and row_count == 1: + header = values + print('Header:') + else: + to_db.append(values) + + # PREPARE DDL&DML + str_table1 = table_name + '(' + str_table2 = table_name + '(' + str_value = 'VALUES (' + for j in range(col_count): + if input_header == '1': + print(' ' + str(j + 1) + ' ' + header[j]); + col_name = header[j] + else: + col_name = 'c' + str(j + 1) + str_table1 += col_name + ' NUMERIC' + str_table2 += col_name + ' ' + str_value = str_value + '?' + if j != col_count - 1: + str_table1 += ',' + str_table2 += ',' + str_value = str_value + ',' + str_table1 += ')' + str_table2 += ')' + str_value += ')' + print('') + + # CREATE TABLE + str_create = 'CREATE TABLE ' + str_table1 + ';' + print(str_create) + time_start = time.time() + cur.execute(str_create) + print('Elapsed Time: ' + str(time.time() - time_start) + '\n') + + # LOAD DATA + str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';' + print(str_insert) + time_start = time.time() + cur.executemany(str_insert, to_db) + conn.commit() + print('Elapsed Time: ' + str(time.time() - time_start) + '\n') + + # CREATE INDEX + array_idx = str_index.split(',') + for col_idx in array_idx: + if col_idx != '': + time_start = time.time() + if input_header == '1': + sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(' + header[int(col_idx) - 1] + ');' + else: + sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(c' + col_idx + ');' + cur.execute(sql_index) + print(sql_index) + print('Elapsed Time: ' + str(time.time() - time_start) + '\n') + + cur.close() + +def execute(conn, str_select, output_file, output_header, explain_plan): + + cur = conn.cursor() + + # SELECT + time_start = time.time() + print(str_select) + cur.execute(str_select) + print('Elapsed Time: ' + str(time.time() - time_start) + '\n') + + # OUTPUT + out = open(output_file, 'w') + + # HEADER + if output_header == '1': + col_count = 0 + for col in cur.description: + col_count += 1 + if col_count != len(cur.description): + out.write(str(col[0]) + '\t') + else: + out.write(str(col[0]) + '\n') + + # CONTENT + for row in cur: + col_count = 0 + for col in row: + col_count += 1 + if col_count != len(row): + out.write(str(col) + '\t') + else: + out.write(str(col) + '\n') + + out.close() + + # EXPLAIN PLAN + if explain_plan == '1': + #conn.executescript('.explain on') + cur.execute('explain query plan ' + str_select) + for row in cur: + col_count = 0 + for col in row: + col_count += 1 + if col_count != len(row): + print(str(col) + '\t') + else: + print(str(col) + '\n') + + cur.close()
