Mercurial > repos > jjohnson > query_tabular
changeset 8:6d9c91071884 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
author | jjohnson |
---|---|
date | Fri, 15 Apr 2016 16:29:51 -0400 |
parents | aa2409ae9dc0 |
children | b7f149b4792f |
files | query_tabular.py query_tabular.xml |
diffstat | 2 files changed, 81 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/query_tabular.py Tue Feb 16 09:33:46 2016 -0500 +++ b/query_tabular.py Fri Apr 15 16:29:51 2016 -0400 @@ -14,8 +14,16 @@ - could read column names from comment lines, but issues with legal names - could add some transformations on tabular columns, e.g. a regex to format date/time strings - c2 : re.sub('pat', 'sub', c2) - c3 : + index: ['c2','c4,c5'] + unique: ['c1'] + format: { + c2 : re.sub('pat', 'sub', c2) + c3 : len(c3) + } + def format(colname,val, expr): + +- allow optional autoincrement id column - user supplied name? + autoincrement : 'id' - column_defs dict of columns to create from tabular input column_defs : { 'name1' : 'expr', 'name2' : 'expr'} - allow multiple queries and outputs @@ -111,7 +119,7 @@ return col_names, col_types, col_def, col_idx -def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): +def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) @@ -122,6 +130,17 @@ try: c = conn.cursor() c.execute(table_def) + conn.commit() + c.close() + for i,index in enumerate(unique_indexes): + index_name='idx_uniq_%s_%d' % (table_name,i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns,unique=True) + for i,index in enumerate(indexes): + index_name='idx_%s_%d' % (table_name,i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns) + c = conn.cursor() with open(file_path, "r") as fh: for linenum, line in enumerate(fh): if linenum < skip or line.startswith(comment_char): @@ -141,6 +160,12 @@ print >> sys.stderr, 'Failed: %s' % (e) exit(1) +def create_index(conn, table_name, index_name, index_columns, unique=False): + index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns)) + c = conn.cursor() + c.execute(index_def) + conn.commit() + c.close() def regex_match(expr, item): return re.match(expr, item) is not None @@ -216,7 +241,11 @@ load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False else: load_named_columns = False - create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) + unique_indexes = table['unique'] if 'unique' in table else [] + indexes = table['index'] if 'index' in table else [] + create_table(conn, path, table_name, column_names=column_names, + skip=comment_lines, load_named_columns=load_named_columns, + unique_indexes=unique_indexes, indexes=indexes) except Exception, exc: print >> sys.stderr, "Error: %s" % exc conn.close()
--- a/query_tabular.xml Tue Feb 16 09:33:46 2016 -0500 +++ b/query_tabular.xml Fri Apr 15 16:29:51 2016 -0400 @@ -1,4 +1,4 @@ -<tool id="query_tabular" name="Query Tabular" version="0.1.1"> +<tool id="query_tabular" name="Query Tabular" version="0.1.2"> <description>using sqlite sql</description> <requirements> @@ -32,26 +32,44 @@ #for $i,$tbl in enumerate($tables): #set $jtbl = dict() #set $jtbl['file_path'] = str($tbl.table) - #if $tbl.table_name - #set $tname = str($tbl.table_name) + #if $tbl.tbl_opts.table_name: + #set $tname = str($tbl.tbl_opts.table_name) #else #set $tname = 't' + str($i + 1) #end if #set $jtbl['table_name'] = $tname - #if $tbl.col_names: - #set $col_names = str($tbl.col_names) - #if $tbl.load_named_columns: + ## #if $tbl.tbl_opts.sel_cols: + ## #set $jtbl['sel_cols'] = $tbl.tbl_opts.sel_cols el_cols + ## #end if + #if $tbl.tbl_opts.col_names: + #set $col_names = str($tbl.tbl_opts.col_names) + #if $tbl.tbl_opts.load_named_columns: #set $jtbl['load_named_columns'] = True #end if #else #set $col_names = '' #end if #set $jtbl['column_names'] = $col_names - #if str($tbl.skip_lines) != '': - #set $jtbl['comment_lines'] = int($tbl.skip_lines) - #elif $tbl.table.metadata.comment_lines > 0: + #if str($tbl.tbl_opts.skip_lines) != '': + #set $jtbl['comment_lines'] = int($tbl.tbl_opts.skip_lines) + #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: #set $jtbl['comment_lines'] = int($tbl.table.metadata.comment_lines) #end if + #set $idx_unique = [] + #set $idx_non = [] + #for $idx in $tbl.tbl_opts.indexes: + #if $idx.unique: + #silent $idx_unique.append(str($idx.index_columns)) + #else: + #silent $idx_non.append(str($idx.index_columns)) + #end if + #end for + #if len($idx_unique) > 0: + #set $jtbl['unique'] = $idx_unique + #end if + #if len($idx_non) > 0: + #set $jtbl['index'] = $idx_unique + #end if #set $jtbls += [$jtbl] #end for #echo $json.dumps($jtbldef) @@ -61,22 +79,31 @@ <param name="workdb" type="hidden" value="workdb.sqlite" label=""/> <repeat name="tables" title="Add tables" min="1"> <param name="table" type="data" format="tabular" label="Dataset"/> - <param name="table_name" type="text" value="" optional="true" label="Table name"> - <help>By default, tables will be named: t1,t2,...,tn</help> - <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> - </param> - <param name="col_names" type="text" value="" optional="true" label="Column names"> - <help>By default, table columns will be named: c1,c2,c3,...,cn</help> - <sanitizer sanitize="False"/> - <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> - </param> - <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load named columns into database"/> - <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" /> + <section name="tbl_opts" expanded="false" title="Table Options"> + <param name="table_name" type="text" value="" optional="true" label="Table name"> + <help>By default, tables will be named: t1,t2,...,tn</help> + <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> + </param> + <param name="col_names" type="text" value="" optional="true" label="Column names"> + <help>By default, table columns will be named: c1,c2,c3,...,cn</help> + <sanitizer sanitize="False"/> + <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> + </param> + <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load named columns into database"/> + <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" /> + <repeat name="indexes"> + <param name="unique" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="This is a unique index"/> + <param name="index_columns" type="text" value="" label="Index on Columns"> + <help>Create an index on the column names: e,g, c1 or c2,c4</help> + <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> + </param> + </repeat> + </section> </repeat> <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query"> <help>By default, tables will be named: t1,t2,...,tn</help> <sanitizer sanitize="False"/> - <validator type="regex" message="">^(?im)\s*select\s+.*\s+from\s+.*$</validator> + <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator> </param> <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>