Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.py @ 8:6d9c91071884 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
author | jjohnson |
---|---|
date | Fri, 15 Apr 2016 16:29:51 -0400 |
parents | aa2409ae9dc0 |
children | 98bd1e29d669 |
comparison
equal
deleted
inserted
replaced
7:aa2409ae9dc0 | 8:6d9c91071884 |
---|---|
12 """ | 12 """ |
13 TODO: | 13 TODO: |
14 - could read column names from comment lines, but issues with legal names | 14 - could read column names from comment lines, but issues with legal names |
15 - could add some transformations on tabular columns, | 15 - could add some transformations on tabular columns, |
16 e.g. a regex to format date/time strings | 16 e.g. a regex to format date/time strings |
17 c2 : re.sub('pat', 'sub', c2) | 17 index: ['c2','c4,c5'] |
18 c3 : | 18 unique: ['c1'] |
19 format: { | |
20 c2 : re.sub('pat', 'sub', c2) | |
21 c3 : len(c3) | |
22 } | |
23 def format(colname,val, expr): | |
24 | |
25 - allow optional autoincrement id column - user supplied name? | |
26 autoincrement : 'id' | |
19 - column_defs dict of columns to create from tabular input | 27 - column_defs dict of columns to create from tabular input |
20 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} | 28 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} |
21 - allow multiple queries and outputs | 29 - allow multiple queries and outputs |
22 - add a --json input for table definitions (or yaml) | 30 - add a --json input for table definitions (or yaml) |
23 JSON config: | 31 JSON config: |
109 for i, col_name in enumerate(col_names): | 117 for i, col_name in enumerate(col_names): |
110 col_def.append('%s %s' % (col_names[i], col_types[i])) | 118 col_def.append('%s %s' % (col_names[i], col_types[i])) |
111 return col_names, col_types, col_def, col_idx | 119 return col_names, col_types, col_def, col_idx |
112 | 120 |
113 | 121 |
114 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): | 122 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): |
115 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) | 123 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) |
116 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] | 124 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] |
117 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) | 125 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) |
118 # print >> sys.stdout, table_def | 126 # print >> sys.stdout, table_def |
119 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) | 127 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) |
120 # print >> sys.stdout, insert_stmt | 128 # print >> sys.stdout, insert_stmt |
121 data_lines = 0 | 129 data_lines = 0 |
122 try: | 130 try: |
123 c = conn.cursor() | 131 c = conn.cursor() |
124 c.execute(table_def) | 132 c.execute(table_def) |
133 conn.commit() | |
134 c.close() | |
135 for i,index in enumerate(unique_indexes): | |
136 index_name='idx_uniq_%s_%d' % (table_name,i) | |
137 index_columns = index.split(',') | |
138 create_index(conn, table_name, index_name, index_columns,unique=True) | |
139 for i,index in enumerate(indexes): | |
140 index_name='idx_%s_%d' % (table_name,i) | |
141 index_columns = index.split(',') | |
142 create_index(conn, table_name, index_name, index_columns) | |
143 c = conn.cursor() | |
125 with open(file_path, "r") as fh: | 144 with open(file_path, "r") as fh: |
126 for linenum, line in enumerate(fh): | 145 for linenum, line in enumerate(fh): |
127 if linenum < skip or line.startswith(comment_char): | 146 if linenum < skip or line.startswith(comment_char): |
128 continue | 147 continue |
129 data_lines += 1 | 148 data_lines += 1 |
139 c.close() | 158 c.close() |
140 except Exception, e: | 159 except Exception, e: |
141 print >> sys.stderr, 'Failed: %s' % (e) | 160 print >> sys.stderr, 'Failed: %s' % (e) |
142 exit(1) | 161 exit(1) |
143 | 162 |
163 def create_index(conn, table_name, index_name, index_columns, unique=False): | |
164 index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns)) | |
165 c = conn.cursor() | |
166 c.execute(index_def) | |
167 conn.commit() | |
168 c.close() | |
144 | 169 |
145 def regex_match(expr, item): | 170 def regex_match(expr, item): |
146 return re.match(expr, item) is not None | 171 return re.match(expr, item) is not None |
147 | 172 |
148 | 173 |
214 column_names = table['column_names'] if 'column_names' in table else None | 239 column_names = table['column_names'] if 'column_names' in table else None |
215 if column_names: | 240 if column_names: |
216 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False | 241 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False |
217 else: | 242 else: |
218 load_named_columns = False | 243 load_named_columns = False |
219 create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) | 244 unique_indexes = table['unique'] if 'unique' in table else [] |
245 indexes = table['index'] if 'index' in table else [] | |
246 create_table(conn, path, table_name, column_names=column_names, | |
247 skip=comment_lines, load_named_columns=load_named_columns, | |
248 unique_indexes=unique_indexes, indexes=indexes) | |
220 except Exception, exc: | 249 except Exception, exc: |
221 print >> sys.stderr, "Error: %s" % exc | 250 print >> sys.stderr, "Error: %s" % exc |
222 conn.close() | 251 conn.close() |
223 | 252 |
224 query = None | 253 query = None |