annotate sqlite_lib.py @ 0:aa7a5cc0f59b default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:27:26 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
1 # -*- coding:utf-8 -*-
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
2
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
3 """
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
4 sqlite_lib
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
5 """
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
6
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
7 __author__ = 'Sem4j'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
8 __version__ = '0.0.2'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
9 __date__ = '13 Nov 2013'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
10
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
11 import csv, sqlite3, time
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
12
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
13 def connect():
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
14 conn = sqlite3.connect(':memory:')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
15 conn.row_factory = sqlite3.Row
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
16 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore")
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
17 return conn
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
18
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
19 def load(conn, table_name, input_file, input_header, str_index):
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
20
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
21 cur = conn.cursor()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
22
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
23 with open(input_file,'rb') as infile:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
24 dr = csv.reader(infile, delimiter='\t')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
25 to_db = []
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
26 row_count = 0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
27 for row in dr:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
28 row_count += 1
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
29 values =[]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
30 col_count = 0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
31 for col in row:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
32 col_count += 1
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
33 values.append(col)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
34 if input_header == '1' and row_count == 1:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
35 header = values
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
36 print('Header:')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
37 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
38 to_db.append(values)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
39
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
40 # PREPARE DDL&DML
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
41 str_table1 = table_name + '('
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
42 str_table2 = table_name + '('
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
43 str_value = 'VALUES ('
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
44 for j in range(col_count):
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
45 if input_header == '1':
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
46 print(' ' + str(j + 1) + ' ' + header[j]);
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
47 col_name = header[j]
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
48 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
49 col_name = 'c' + str(j + 1)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
50 str_table1 += col_name + ' NUMERIC'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
51 str_table2 += col_name + ' '
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
52 str_value = str_value + '?'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
53 if j != col_count - 1:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
54 str_table1 += ','
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
55 str_table2 += ','
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
56 str_value = str_value + ','
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
57 str_table1 += ')'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
58 str_table2 += ')'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
59 str_value += ')'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
60 print('')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
61
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
62 # CREATE TABLE
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
63 str_create = 'CREATE TABLE ' + str_table1 + ';'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
64 print(str_create)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
65 time_start = time.time()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
66 cur.execute(str_create)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
67 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
68
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
69 # LOAD DATA
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
70 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
71 print(str_insert)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
72 time_start = time.time()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
73 cur.executemany(str_insert, to_db)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
74 conn.commit()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
75 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
76
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
77 # CREATE INDEX
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
78 array_idx = str_index.split(',')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
79 for col_idx in array_idx:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
80 if col_idx != '':
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
81 time_start = time.time()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
82 if input_header == '1':
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
83 sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(' + header[int(col_idx) - 1] + ');'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
84 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
85 sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(c' + col_idx + ');'
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
86 cur.execute(sql_index)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
87 print(sql_index)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
88 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
89
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
90 cur.close()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
91
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
92 def execute(conn, str_select, output_file, output_header, explain_plan):
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
93
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
94 cur = conn.cursor()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
95
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
96 # SELECT
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
97 time_start = time.time()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
98 print(str_select)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
99 cur.execute(str_select)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
100 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
101
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
102 # OUTPUT
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
103 out = open(output_file, 'w')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
104
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
105 # HEADER
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
106 if output_header == '1':
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
107 col_count = 0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
108 for col in cur.description:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
109 col_count += 1
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
110 if col_count != len(cur.description):
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
111 out.write(str(col[0]) + '\t')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
112 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
113 out.write(str(col[0]) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
114
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
115 # CONTENT
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
116 for row in cur:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
117 col_count = 0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
118 for col in row:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
119 col_count += 1
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
120 if col_count != len(row):
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
121 out.write(str(col) + '\t')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
122 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
123 out.write(str(col) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
124
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
125 out.close()
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
126
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
127 # EXPLAIN PLAN
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
128 if explain_plan == '1':
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
129 #conn.executescript('.explain on')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
130 cur.execute('explain query plan ' + str_select)
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
131 for row in cur:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
132 col_count = 0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
133 for col in row:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
134 col_count += 1
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
135 if col_count != len(row):
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
136 print(str(col) + '\t')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
137 else:
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
138 print(str(col) + '\n')
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
139
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff changeset
140 cur.close()