Mercurial > repos > jjohnson > query_tabular
changeset 7:aa2409ae9dc0 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
author | jjohnson |
---|---|
date | Tue, 16 Feb 2016 09:33:46 -0500 |
parents | b9a6b07d0d2e |
children | 6d9c91071884 |
files | query_tabular.py query_tabular.xml |
diffstat | 2 files changed, 42 insertions(+), 34 deletions(-) [+] |
line wrap: on
line diff
--- a/query_tabular.py Fri Feb 12 08:37:42 2016 -0500 +++ b/query_tabular.py Tue Feb 16 09:33:46 2016 -0500 @@ -28,8 +28,10 @@ comment_lines : 1 }, { file_path : '/home/galaxy/dataset_102.dat', - table_name : 't2', - column_names : ['c1', 'c2', 'c3'] + table_name : 'gff', + column_names : ['seqname',,,'start','end'] + comment_lines : 1 + load_named_columns : True }, { file_path : '/home/galaxy/dataset_103.dat', table_name : 'test', @@ -58,10 +60,12 @@ def get_column_def(file_path, table_name, skip=0, comment_char='#', - column_names=None, max_lines=100): + column_names=None, max_lines=100,load_named_columns=False): col_pref = ['TEXT', 'REAL', 'INTEGER', None] col_types = [] + col_idx = None data_lines = 0 + try: with open(file_path, "r") as fh: for linenum, line in enumerate(fh): @@ -82,22 +86,33 @@ print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) except Exception, e: print >> sys.stderr, 'Failed: %s' % (e) - for i, col_type in enumerate(col_types): + for i,col_type in enumerate(col_types): if not col_type: col_types[i] = 'TEXT' - col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] - if column_names: - for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): - if cname and i < len(col_names): - col_names[i] = cname + if column_names: + col_names = [] + if load_named_columns: + col_idx = [] + for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): + if cname != '': + col_idx.append(i) + col_names.append(cname) + col_types = [col_types[i] for i in col_idx] + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] + for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): + if cname and i < len(col_names): + col_names[i] = cname + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] col_def = [] for i, col_name in enumerate(col_names): col_def.append('%s %s' % (col_names[i], col_types[i])) - return col_names, col_types, col_def + return col_names, col_types, col_def, col_idx -def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None): - col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names) +def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): + col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) # print >> sys.stdout, table_def @@ -114,6 +129,8 @@ data_lines += 1 try: fields = line.rstrip('\r\n').split('\t') + if col_idx: + fields = [fields[i] for i in col_idx] vals = [col_func[i](x) if x else None for i, x in enumerate(fields)] c.execute(insert_stmt, vals) except Exception, e: @@ -193,9 +210,13 @@ for ti, table in enumerate(tdef['tables']): path = table['file_path'] table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1) + comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 column_names = table['column_names'] if 'column_names' in table else None - comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 - create_table(conn, path, table_name, column_names=column_names, skip=comment_lines) + if column_names: + load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False + else: + load_named_columns = False + create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) except Exception, exc: print >> sys.stderr, "Error: %s" % exc conn.close()
--- a/query_tabular.xml Fri Feb 12 08:37:42 2016 -0500 +++ b/query_tabular.xml Tue Feb 16 09:33:46 2016 -0500 @@ -1,4 +1,4 @@ -<tool id="query_tabular" name="Query Tabular" version="0.1.0"> +<tool id="query_tabular" name="Query Tabular" version="0.1.1"> <description>using sqlite sql</description> <requirements> @@ -14,21 +14,6 @@ -s $workdb #end if -j $table_json - #* - ## #for $i,$tbl in enumerate($tables): - ## #if $tbl.table_name - ## #set $tname = $tbl.table_name - ## #else - ## #set $tname = 't' + str($i + 1) - ## #end if - ## #if $tbl.col_names: - ## #set $col_names = ':' + str($tbl.col_names) - ## #else - ## #set $col_names = '' - ## #end if - ## -t ${tbl.table}=${tname}${$col_names} - ## #end for - *# #if $sqlquery: -Q "$query_file" $no_header @@ -55,6 +40,9 @@ #set $jtbl['table_name'] = $tname #if $tbl.col_names: #set $col_names = str($tbl.col_names) + #if $tbl.load_named_columns: + #set $jtbl['load_named_columns'] = True + #end if #else #set $col_names = '' #end if @@ -77,13 +65,12 @@ <help>By default, tables will be named: t1,t2,...,tn</help> <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> </param> - <!-- - <param name="sel_cols" label="Include columns" type="data_column" multiple="true" data_ref="table" /> - --> <param name="col_names" type="text" value="" optional="true" label="Column names"> <help>By default, table columns will be named: c1,c2,c3,...,cn</help> - <validator type="regex" message="A List of separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores">^([A-Za-z]\w*)?(,([A-Za-z]\w*)?)*$</validator> + <sanitizer sanitize="False"/> + <validator type="regex" message="A List of names separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores. Otherwise, the name must be eclosed in: double quotes, back quotes, or square brackets.">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> </param> + <param name="load_named_columns" type="boolean" truevalue="load_named_columns" falsevalue="" checked="false" label="Only load named columns into database"/> <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" /> </repeat> <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query">