0
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
1 # -*- coding:utf-8 -*-
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
2
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
3 """
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
4 sqlite_lib
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
5 """
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
6
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
7 __author__ = 'Sem4j'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
8 __version__ = '0.0.2'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
9 __date__ = '13 Nov 2013'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
10
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
11 import csv, sqlite3, time
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
12
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
13 def connect():
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
14 conn = sqlite3.connect(':memory:')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
15 conn.row_factory = sqlite3.Row
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
16 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore")
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
17 return conn
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
18
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
19 def load(conn, table_name, input_file, input_header, str_index):
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
20
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
21 cur = conn.cursor()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
22
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
23 with open(input_file,'rb') as infile:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
24 dr = csv.reader(infile, delimiter='\t')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
25 to_db = []
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
26 row_count = 0
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
27 for row in dr:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
28 row_count += 1
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
29 values =[]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
30 col_count = 0
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
31 for col in row:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
32 col_count += 1
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
33 values.append(col)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
34 if input_header == '1' and row_count == 1:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
35 header = values
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
36 print('Header:')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
37 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
38 to_db.append(values)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
39
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
40 # PREPARE DDL&DML
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
41 str_table1 = table_name + '('
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
42 str_table2 = table_name + '('
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
43 str_value = 'VALUES ('
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
44 for j in range(col_count):
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
45 if input_header == '1':
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
46 print(' ' + str(j + 1) + ' ' + header[j]);
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
47 col_name = header[j]
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
48 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
49 col_name = 'c' + str(j + 1)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
50 str_table1 += col_name + ' NUMERIC'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
51 str_table2 += col_name + ' '
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
52 str_value = str_value + '?'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
53 if j != col_count - 1:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
54 str_table1 += ','
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
55 str_table2 += ','
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
56 str_value = str_value + ','
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
57 str_table1 += ')'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
58 str_table2 += ')'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
59 str_value += ')'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
60 print('')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
61
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
62 # CREATE TABLE
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
63 str_create = 'CREATE TABLE ' + str_table1 + ';'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
64 print(str_create)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
65 time_start = time.time()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
66 cur.execute(str_create)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
67 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
68
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
69 # LOAD DATA
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
70 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
71 print(str_insert)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
72 time_start = time.time()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
73 cur.executemany(str_insert, to_db)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
74 conn.commit()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
75 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
76
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
77 # CREATE INDEX
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
78 array_idx = str_index.split(',')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
79 for col_idx in array_idx:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
80 if col_idx != '':
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
81 time_start = time.time()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
82 if input_header == '1':
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
83 sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(' + header[int(col_idx) - 1] + ');'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
84 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
85 sql_index = 'CREATE INDEX idx_' + table_name +'_c' + col_idx + ' on ' + table_name + '(c' + col_idx + ');'
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
86 cur.execute(sql_index)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
87 print(sql_index)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
88 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
89
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
90 cur.close()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
91
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
92 def execute(conn, str_select, output_file, output_header, explain_plan):
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
93
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
94 cur = conn.cursor()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
95
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
96 # SELECT
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
97 time_start = time.time()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
98 print(str_select)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
99 cur.execute(str_select)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
100 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
101
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
102 # OUTPUT
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
103 out = open(output_file, 'w')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
104
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
105 # HEADER
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
106 if output_header == '1':
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
107 col_count = 0
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
108 for col in cur.description:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
109 col_count += 1
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
110 if col_count != len(cur.description):
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
111 out.write(str(col[0]) + '\t')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
112 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
113 out.write(str(col[0]) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
114
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
115 # CONTENT
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
116 for row in cur:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
117 col_count = 0
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
118 for col in row:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
119 col_count += 1
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
120 if col_count != len(row):
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
121 out.write(str(col) + '\t')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
122 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
123 out.write(str(col) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
124
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
125 out.close()
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
126
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
127 # EXPLAIN PLAN
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
128 if explain_plan == '1':
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
129 #conn.executescript('.explain on')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
130 cur.execute('explain query plan ' + str_select)
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
131 for row in cur:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
132 col_count = 0
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
133 for col in row:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
134 col_count += 1
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
135 if col_count != len(row):
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
136 print(str(col) + '\t')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
137 else:
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
138 print(str(col) + '\n')
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
139
|
ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
parents:
diff
changeset
|
140 cur.close()
|