diff sqlite_lib.py @ 0:aa7a5cc0f59b default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:27:26 +0900
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sqlite_lib.py	Tue Dec 30 18:27:26 2014 +0900
@@ -0,0 +1,140 @@
+# -*- coding:utf-8 -*-
+
+"""
+sqlite_lib
+"""
+
+__author__ = 'Sem4j'
+__version__ = '0.0.2'
+__date__ = '13 Nov 2013'
+
+import csv, sqlite3, time
+
+def connect():
+    conn = sqlite3.connect(':memory:')
+    conn.row_factory = sqlite3.Row
+    conn.text_factory = lambda x: unicode(x, "utf-8", "ignore")
+    return conn
+
+def load(conn, table_name, input_file, input_header, str_index):
+
+    cur = conn.cursor()
+    
+    with open(input_file,'rb') as infile:
+        dr = csv.reader(infile, delimiter='\t')
+        to_db = []
+        row_count = 0
+        for row in dr:
+            row_count += 1
+            values =[]
+            col_count = 0
+            for col in row:
+                col_count += 1
+                values.append(col)
+            if input_header == '1' and row_count == 1:
+                header = values
+                print('Header:')
+            else:
+                to_db.append(values)
+
+    # PREPARE DDL&DML
+    str_table1 = table_name + '('
+    str_table2 = table_name + '('
+    str_value = 'VALUES ('
+    for j in range(col_count):
+        if input_header == '1':
+            print('  ' + str(j + 1) + ' ' + header[j]);
+            col_name = header[j]
+        else:
+            col_name = 'c' + str(j + 1)
+        str_table1 += col_name + ' NUMERIC'
+        str_table2 += col_name + ' '
+        str_value = str_value + '?'
+        if j != col_count - 1:
+            str_table1 += ','
+            str_table2 += ','
+            str_value = str_value + ','
+    str_table1 += ')'
+    str_table2 += ')'
+    str_value += ')'
+    print('')
+
+    # CREATE TABLE
+    str_create = 'CREATE TABLE ' + str_table1 + ';'
+    print(str_create)
+    time_start = time.time()
+    cur.execute(str_create)
+    print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
+
+    # LOAD DATA
+    str_insert = 'INSERT INTO ' + str_table2  + ' ' + str_value + ';'
+    print(str_insert)
+    time_start = time.time()
+    cur.executemany(str_insert, to_db)
+    conn.commit()
+    print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
+
+    # CREATE INDEX
+    array_idx = str_index.split(',')
+    for col_idx in array_idx:
+        if col_idx != '':
+            time_start = time.time()
+            if input_header == '1':
+                sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(' + header[int(col_idx) - 1] + ');'
+            else:
+                sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(c' + col_idx + ');'
+            cur.execute(sql_index)
+            print(sql_index)
+            print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
+    
+    cur.close()
+
+def execute(conn, str_select, output_file, output_header, explain_plan):
+    
+    cur = conn.cursor()
+    
+    # SELECT
+    time_start = time.time()
+    print(str_select)
+    cur.execute(str_select)
+    print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
+    
+    # OUTPUT
+    out = open(output_file, 'w')
+    
+    # HEADER
+    if output_header == '1':
+        col_count = 0
+        for col in cur.description:
+            col_count += 1
+            if col_count != len(cur.description):
+                out.write(str(col[0]) + '\t')
+            else:
+                out.write(str(col[0]) + '\n')
+    
+    # CONTENT
+    for row in cur:
+        col_count = 0
+        for col in row:
+            col_count += 1
+            if col_count != len(row):
+                out.write(str(col) + '\t')
+            else:
+                out.write(str(col) + '\n')
+    
+    out.close()
+    
+    # EXPLAIN PLAN
+    if explain_plan == '1':
+        #conn.executescript('.explain on')
+        cur.execute('explain query plan ' + str_select)
+        for row in cur:
+            col_count = 0
+            for col in row:
+                col_count += 1
+                if col_count != len(row):
+                    print(str(col) + '\t')
+                else:
+                    print(str(col) + '\n')
+    
+    cur.close()