comparison sqlite_lib.py @ 0:aa7a5cc0f59b default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:27:26 +0900
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:aa7a5cc0f59b
1 # -*- coding:utf-8 -*-
2
3 """
4 sqlite_lib
5 """
6
7 __author__ = 'Sem4j'
8 __version__ = '0.0.2'
9 __date__ = '13 Nov 2013'
10
11 import csv, sqlite3, time
12
13 def connect():
14 conn = sqlite3.connect(':memory:')
15 conn.row_factory = sqlite3.Row
16 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore")
17 return conn
18
19 def load(conn, table_name, input_file, input_header, str_index):
20
21 cur = conn.cursor()
22
23 with open(input_file,'rb') as infile:
24 dr = csv.reader(infile, delimiter='\t')
25 to_db = []
26 row_count = 0
27 for row in dr:
28 row_count += 1
29 values =[]
30 col_count = 0
31 for col in row:
32 col_count += 1
33 values.append(col)
34 if input_header == '1' and row_count == 1:
35 header = values
36 print('Header:')
37 else:
38 to_db.append(values)
39
40 # PREPARE DDL&DML
41 str_table1 = table_name + '('
42 str_table2 = table_name + '('
43 str_value = 'VALUES ('
44 for j in range(col_count):
45 if input_header == '1':
46 print(' ' + str(j + 1) + ' ' + header[j]);
47 col_name = header[j]
48 else:
49 col_name = 'c' + str(j + 1)
50 str_table1 += col_name + ' NUMERIC'
51 str_table2 += col_name + ' '
52 str_value = str_value + '?'
53 if j != col_count - 1:
54 str_table1 += ','
55 str_table2 += ','
56 str_value = str_value + ','
57 str_table1 += ')'
58 str_table2 += ')'
59 str_value += ')'
60 print('')
61
62 # CREATE TABLE
63 str_create = 'CREATE TABLE ' + str_table1 + ';'
64 print(str_create)
65 time_start = time.time()
66 cur.execute(str_create)
67 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
68
69 # LOAD DATA
70 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';'
71 print(str_insert)
72 time_start = time.time()
73 cur.executemany(str_insert, to_db)
74 conn.commit()
75 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
76
77 # CREATE INDEX
78 array_idx = str_index.split(',')
79 for col_idx in array_idx:
80 if col_idx != '':
81 time_start = time.time()
82 if input_header == '1':
83 sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(' + header[int(col_idx) - 1] + ');'
84 else:
85 sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(c' + col_idx + ');'
86 cur.execute(sql_index)
87 print(sql_index)
88 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
89
90 cur.close()
91
92 def execute(conn, str_select, output_file, output_header, explain_plan):
93
94 cur = conn.cursor()
95
96 # SELECT
97 time_start = time.time()
98 print(str_select)
99 cur.execute(str_select)
100 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
101
102 # OUTPUT
103 out = open(output_file, 'w')
104
105 # HEADER
106 if output_header == '1':
107 col_count = 0
108 for col in cur.description:
109 col_count += 1
110 if col_count != len(cur.description):
111 out.write(str(col[0]) + '\t')
112 else:
113 out.write(str(col[0]) + '\n')
114
115 # CONTENT
116 for row in cur:
117 col_count = 0
118 for col in row:
119 col_count += 1
120 if col_count != len(row):
121 out.write(str(col) + '\t')
122 else:
123 out.write(str(col) + '\n')
124
125 out.close()
126
127 # EXPLAIN PLAN
128 if explain_plan == '1':
129 #conn.executescript('.explain on')
130 cur.execute('explain query plan ' + str_select)
131 for row in cur:
132 col_count = 0
133 for col in row:
134 col_count += 1
135 if col_count != len(row):
136 print(str(col) + '\t')
137 else:
138 print(str(col) + '\n')
139
140 cur.close()