# HG changeset patch # User jjohnson # Date 1455633226 18000 # Node ID aa2409ae9dc0d6fc8babf32b99735e629ebb0052 # Parent b9a6b07d0d2e6dede0b5955410edf53432059ba4 planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty diff -r b9a6b07d0d2e -r aa2409ae9dc0 query_tabular.py --- a/query_tabular.py Fri Feb 12 08:37:42 2016 -0500 +++ b/query_tabular.py Tue Feb 16 09:33:46 2016 -0500 @@ -28,8 +28,10 @@ comment_lines : 1 }, { file_path : '/home/galaxy/dataset_102.dat', - table_name : 't2', - column_names : ['c1', 'c2', 'c3'] + table_name : 'gff', + column_names : ['seqname',,,'start','end'] + comment_lines : 1 + load_named_columns : True }, { file_path : '/home/galaxy/dataset_103.dat', table_name : 'test', @@ -58,10 +60,12 @@ def get_column_def(file_path, table_name, skip=0, comment_char='#', - column_names=None, max_lines=100): + column_names=None, max_lines=100,load_named_columns=False): col_pref = ['TEXT', 'REAL', 'INTEGER', None] col_types = [] + col_idx = None data_lines = 0 + try: with open(file_path, "r") as fh: for linenum, line in enumerate(fh): @@ -82,22 +86,33 @@ print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) except Exception, e: print >> sys.stderr, 'Failed: %s' % (e) - for i, col_type in enumerate(col_types): + for i,col_type in enumerate(col_types): if not col_type: col_types[i] = 'TEXT' - col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] - if column_names: - for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): - if cname and i < len(col_names): - col_names[i] = cname + if column_names: + col_names = [] + if load_named_columns: + col_idx = [] + for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): + if cname != '': + col_idx.append(i) + col_names.append(cname) + col_types = [col_types[i] for i in col_idx] + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] + for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): + if cname and i < len(col_names): + col_names[i] = cname + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] col_def = [] for i, col_name in enumerate(col_names): col_def.append('%s %s' % (col_names[i], col_types[i])) - return col_names, col_types, col_def + return col_names, col_types, col_def, col_idx -def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None): - col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names) +def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): + col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) # print >> sys.stdout, table_def @@ -114,6 +129,8 @@ data_lines += 1 try: fields = line.rstrip('\r\n').split('\t') + if col_idx: + fields = [fields[i] for i in col_idx] vals = [col_func[i](x) if x else None for i, x in enumerate(fields)] c.execute(insert_stmt, vals) except Exception, e: @@ -193,9 +210,13 @@ for ti, table in enumerate(tdef['tables']): path = table['file_path'] table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1) + comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 column_names = table['column_names'] if 'column_names' in table else None - comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 - create_table(conn, path, table_name, column_names=column_names, skip=comment_lines) + if column_names: + load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False + else: + load_named_columns = False + create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) except Exception, exc: print >> sys.stderr, "Error: %s" % exc conn.close() diff -r b9a6b07d0d2e -r aa2409ae9dc0 query_tabular.xml --- a/query_tabular.xml Fri Feb 12 08:37:42 2016 -0500 +++ b/query_tabular.xml Tue Feb 16 09:33:46 2016 -0500 @@ -1,4 +1,4 @@ - + using sqlite sql @@ -14,21 +14,6 @@ -s $workdb #end if -j $table_json - #* - ## #for $i,$tbl in enumerate($tables): - ## #if $tbl.table_name - ## #set $tname = $tbl.table_name - ## #else - ## #set $tname = 't' + str($i + 1) - ## #end if - ## #if $tbl.col_names: - ## #set $col_names = ':' + str($tbl.col_names) - ## #else - ## #set $col_names = '' - ## #end if - ## -t ${tbl.table}=${tname}${$col_names} - ## #end for - *# #if $sqlquery: -Q "$query_file" $no_header @@ -55,6 +40,9 @@ #set $jtbl['table_name'] = $tname #if $tbl.col_names: #set $col_names = str($tbl.col_names) + #if $tbl.load_named_columns: + #set $jtbl['load_named_columns'] = True + #end if #else #set $col_names = '' #end if @@ -77,13 +65,12 @@ By default, tables will be named: t1,t2,...,tn ^[A-Za-z]\w*$ - By default, table columns will be named: c1,c2,c3,...,cn - ^([A-Za-z]\w*)?(,([A-Za-z]\w*)?)*$ + + ^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$ +