comparison query_tabular.py @ 7:aa2409ae9dc0 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
author jjohnson
date Tue, 16 Feb 2016 09:33:46 -0500
parents afdbc7198353
children 6d9c91071884
comparison
equal deleted inserted replaced
6:b9a6b07d0d2e 7:aa2409ae9dc0
26 table_name : 't1', 26 table_name : 't1',
27 column_names : ['c1', 'c2', 'c3'], 27 column_names : ['c1', 'c2', 'c3'],
28 comment_lines : 1 28 comment_lines : 1
29 }, 29 },
30 { file_path : '/home/galaxy/dataset_102.dat', 30 { file_path : '/home/galaxy/dataset_102.dat',
31 table_name : 't2', 31 table_name : 'gff',
32 column_names : ['c1', 'c2', 'c3'] 32 column_names : ['seqname',,,'start','end']
33 comment_lines : 1
34 load_named_columns : True
33 }, 35 },
34 { file_path : '/home/galaxy/dataset_103.dat', 36 { file_path : '/home/galaxy/dataset_103.dat',
35 table_name : 'test', 37 table_name : 'test',
36 column_names : ['c1', 'c2', 'c3'] 38 column_names : ['c1', 'c2', 'c3']
37 } 39 }
56 return 'TEXT' 58 return 'TEXT'
57 return None 59 return None
58 60
59 61
60 def get_column_def(file_path, table_name, skip=0, comment_char='#', 62 def get_column_def(file_path, table_name, skip=0, comment_char='#',
61 column_names=None, max_lines=100): 63 column_names=None, max_lines=100,load_named_columns=False):
62 col_pref = ['TEXT', 'REAL', 'INTEGER', None] 64 col_pref = ['TEXT', 'REAL', 'INTEGER', None]
63 col_types = [] 65 col_types = []
66 col_idx = None
64 data_lines = 0 67 data_lines = 0
68
65 try: 69 try:
66 with open(file_path, "r") as fh: 70 with open(file_path, "r") as fh:
67 for linenum, line in enumerate(fh): 71 for linenum, line in enumerate(fh):
68 if linenum < skip: 72 if linenum < skip:
69 continue 73 continue
80 col_types[i] = colType 84 col_types[i] = colType
81 except Exception, e: 85 except Exception, e:
82 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) 86 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e)
83 except Exception, e: 87 except Exception, e:
84 print >> sys.stderr, 'Failed: %s' % (e) 88 print >> sys.stderr, 'Failed: %s' % (e)
85 for i, col_type in enumerate(col_types): 89 for i,col_type in enumerate(col_types):
86 if not col_type: 90 if not col_type:
87 col_types[i] = 'TEXT' 91 col_types[i] = 'TEXT'
88 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] 92 if column_names:
89 if column_names: 93 col_names = []
90 for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): 94 if load_named_columns:
91 if cname and i < len(col_names): 95 col_idx = []
92 col_names[i] = cname 96 for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]):
97 if cname != '':
98 col_idx.append(i)
99 col_names.append(cname)
100 col_types = [col_types[i] for i in col_idx]
101 else:
102 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
103 for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]):
104 if cname and i < len(col_names):
105 col_names[i] = cname
106 else:
107 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
93 col_def = [] 108 col_def = []
94 for i, col_name in enumerate(col_names): 109 for i, col_name in enumerate(col_names):
95 col_def.append('%s %s' % (col_names[i], col_types[i])) 110 col_def.append('%s %s' % (col_names[i], col_types[i]))
96 return col_names, col_types, col_def 111 return col_names, col_types, col_def, col_idx
97 112
98 113
99 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None): 114 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False):
100 col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names) 115 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns)
101 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] 116 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types]
102 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) 117 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def))
103 # print >> sys.stdout, table_def 118 # print >> sys.stdout, table_def
104 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) 119 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names]))
105 # print >> sys.stdout, insert_stmt 120 # print >> sys.stdout, insert_stmt
112 if linenum < skip or line.startswith(comment_char): 127 if linenum < skip or line.startswith(comment_char):
113 continue 128 continue
114 data_lines += 1 129 data_lines += 1
115 try: 130 try:
116 fields = line.rstrip('\r\n').split('\t') 131 fields = line.rstrip('\r\n').split('\t')
132 if col_idx:
133 fields = [fields[i] for i in col_idx]
117 vals = [col_func[i](x) if x else None for i, x in enumerate(fields)] 134 vals = [col_func[i](x) if x else None for i, x in enumerate(fields)]
118 c.execute(insert_stmt, vals) 135 c.execute(insert_stmt, vals)
119 except Exception, e: 136 except Exception, e:
120 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) 137 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e)
121 conn.commit() 138 conn.commit()
191 tdef = json.load(fh) 208 tdef = json.load(fh)
192 if 'tables' in tdef: 209 if 'tables' in tdef:
193 for ti, table in enumerate(tdef['tables']): 210 for ti, table in enumerate(tdef['tables']):
194 path = table['file_path'] 211 path = table['file_path']
195 table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1) 212 table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1)
213 comment_lines = table['comment_lines'] if 'comment_lines' in table else 0
196 column_names = table['column_names'] if 'column_names' in table else None 214 column_names = table['column_names'] if 'column_names' in table else None
197 comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 215 if column_names:
198 create_table(conn, path, table_name, column_names=column_names, skip=comment_lines) 216 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False
217 else:
218 load_named_columns = False
219 create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns)
199 except Exception, exc: 220 except Exception, exc:
200 print >> sys.stderr, "Error: %s" % exc 221 print >> sys.stderr, "Error: %s" % exc
201 conn.close() 222 conn.close()
202 223
203 query = None 224 query = None