Mercurial > repos > devteam > column_maker
changeset 7:e7c273e8d4d6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit 35c79ccee143e8d178ebd24f6317888de3ca0187"
author | devteam |
---|---|
date | Mon, 25 Jan 2021 09:59:49 +0000 |
parents | 0aeda7a81b46 |
children | 227e82286a0e |
files | column_maker.py column_maker.xml |
diffstat | 2 files changed, 66 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/column_maker.py Wed Dec 30 00:49:52 2020 +0000 +++ b/column_maker.py Mon Jan 25 09:59:49 2021 +0000 @@ -9,17 +9,22 @@ import argparse import json import re +import sys parser = argparse.ArgumentParser() parser.add_argument('input', type=argparse.FileType('r'), help="input file") parser.add_argument('output', type=argparse.FileType('wt'), help="output file") parser.add_argument('cond', nargs='?', type=str, help="expression") -parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'], - help="round result") parser.add_argument('columns', nargs='?', type=int, help="number of columns") parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types") -parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'], +parser.add_argument('--round', action="store_true", + help="round result") +parser.add_argument('--avoid_scientific_notation', action="store_true", help="avoid scientific notation") +parser.add_argument('--header_new_column_name', default=None, type=str, + help="First line of input is a header line with column " + "names and this should become the name of the new " + "column") parser.add_argument('--load_json', default=None, type=argparse.FileType('r'), help="overwrite parsed arguments from json file") args = parser.parse_args() @@ -33,42 +38,40 @@ out = argparse_dict['output'] expr = argparse_dict['cond'] round_result = argparse_dict['round'] +avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] + +if argparse_dict['header_new_column_name'] is not None: + header_line = fh.readline().strip('\n') + out.write( + '{0}\t{1}\n'.format( + header_line, argparse_dict['header_new_column_name'] + ) + ) try: in_columns = int(argparse_dict['columns']) + if in_columns < 2: + # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. + raise ValueError except Exception: - exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") -if in_columns < 2: - # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. - exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") + if not fh.readline(): + # empty file content is ok and should produce empty output + out.close() + sys.exit() + sys.exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") try: in_column_types = argparse_dict['column_types'].split(',') except Exception: - exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") + sys.exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") if len(in_column_types) != in_columns: - exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") -avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] - -# Unescape if input has been escaped -mapped_str = { - '__lt__': '<', - '__le__': '<=', - '__eq__': '==', - '__ne__': '!=', - '__gt__': '>', - '__ge__': '>=', - '__sq__': '\'', - '__dq__': '"', -} -for key, value in mapped_str.items(): - expr = expr.replace(key, value) + sys.exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") operators = 'is|not|or|and' builtin_and_math_functions = 'abs|all|any|bin|chr|cmp|complex|divmod|float|bool|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|log10|exp|sqrt|ceil|floor' string_and_list_methods = [name for name in dir('') + dir([]) if not name.startswith('_')] whitelist = r"^([c0-9\+\-\*\/\(\)\.\'\"><=,:! ]|%s|%s|%s)*$" % (operators, builtin_and_math_functions, '|'.join(string_and_list_methods)) if not re.compile(whitelist).match(expr): - exit("Invalid expression") -if avoid_scientific_notation == "yes": + sys.exit("Invalid expression") +if avoid_scientific_notation: expr = "format_float_positional(%s)" % expr # Prepare the column variable names and wrappers for column data types @@ -77,7 +80,7 @@ col_name = "c%d" % col cols.append(col_name) col_type = in_column_types[col - 1].strip() - if round_result == 'no' and col_type == 'int': + if not round_result and col_type == 'int': col_type = 'float' type_cast = "%s(%s)" % (col_type, col_name) type_casts.append(type_cast) @@ -118,7 +121,7 @@ %s %s new_val = %s - if round_result == "yes": + if round_result: new_val = int(round(new_val)) new_line = line + '\\t' + str(new_val) + "\\n" out.write(new_line) @@ -138,9 +141,9 @@ out.close() if str(e).startswith('invalid syntax'): valid_expr = False - exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) + sys.exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) else: - exit(str(e)) + sys.exit(str(e)) if valid_expr: out.close()
--- a/column_maker.xml Wed Dec 30 00:49:52 2020 +0000 +++ b/column_maker.xml Mon Jan 25 09:59:49 2021 +0000 @@ -1,45 +1,52 @@ -<tool id="Add_a_column1" name="Compute" version="1.4"> +<tool id="Add_a_column1" name="Compute" version="1.5"> <description>an expression on every row</description> <requirements> <requirement type="package" version="3.8">python</requirement> - <requirement type="package" version="4.4">sed</requirement> <requirement type="package" version="1.19.1">numpy</requirement> </requirements> <command detect_errors="aggressive"><![CDATA[ - #if $header_lines_conditional.header_lines_select == "yes": - (sed -n '1,1p' '$input' | sed "s|$|%${header_lines_conditional.header_new_column_name}|" | tr "%" "\t") > header && - sed '1,1d' '$input' > data && - #else: - touch header && - ln -s '$input' data && - #end if + ln -s '$input' data && ## inject colums and column_types metadata into inputs json #import json #set inputs_dict = json.load(open($inputs)) #set inputs_dict['columns'] = $input.metadata.columns #set inputs_dict['column_types'] = $input.metadata.column_types + ## flatten conditional + #if $header_lines_conditional.header_lines_select == "yes": + #set inputs_dict['header_new_column_name'] = str($header_lines_conditional.header_new_column_name) + #end if #set x = json.dump($inputs_dict, open($inputs, 'w')) python '$__tool_directory__/column_maker.py' - data column_maker_output + data '$out_file1' --load_json '$inputs' - && cat header column_maker_output > '$out_file1' ]]></command> <configfiles> <inputs name="inputs"/> </configfiles> <inputs> - <param name="cond" type="text" value="c3-c2" label="Add expression"/> + <param name="cond" type="text" value="c3-c2" label="Add expression"> + <sanitizer> + <valid initial="default"> + <add value="<" /> + <add value=">" /> + <add value=""" /> + <add value="'" /> + </valid> + </sanitizer> + </param> <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/> - <param name="round" type="select" label="Round result?"> - <option value="no">NO</option> - <option value="yes">YES</option> - </param> + <param name="round" type="boolean" truevalue="yes" falsevalue="no" label="Round result?" /> + <param name="avoid_scientific_notation" type="boolean" truevalue="yes" falsevalue="no" + label="Avoid scientific notation" + help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers)." /> <conditional name="header_lines_conditional"> - <param name="header_lines_select" type="select" label="Skip a header line" help="# characters are already considered as comments and kept" > - <option value="no" >no</option> - <option value="yes" >yes</option> + <param name="header_lines_select" type="select" + label="Input has a header line with column names?" + help="Select Yes to be able to specify a name for the new column and have it added to the header line. If you select No, the first line will be treated as a regular line: If it is empty or starts with a # character it will be skipped, otherwise the tool will attempt to compute the specified expression on it." > + <option value="no" >No</option> + <option value="yes" >Yes</option> </param> <when value="no"> </when> @@ -47,10 +54,6 @@ <param name="header_new_column_name" type="text" value="New Column" label="The new column name" /> </when> </conditional> - <param name="avoid_scientific_notation" type="select" label="Avoid scientific notation" help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers)."> - <option value="no">no</option> - <option value="yes">yes</option> - </param> </inputs> <outputs> <data format_source="input" name="out_file1" metadata_source="input"/> @@ -59,19 +62,19 @@ <test> <param name="cond" value="c3-c2"/> <param name="input" value="1.bed"/> - <param name="round" value="no"/> + <param name="round" value="false"/> <output name="out_file1" file="column_maker_out1.interval"/> </test> <test> <param name="cond" value="c4*1"/> <param name="input" value="1.interval"/> - <param name="round" value="no"/> + <param name="round" value="false"/> <output name="out_file1" file="column_maker_out2.interval"/> </test> <test> <param name="cond" value="c4*1"/> <param name="input" value="1.header.tsv"/> - <param name="round" value="no"/> + <param name="round" value="false"/> <conditional name="header_lines_conditional"> <param name="header_lines_select" value="yes" /> <param name="header_new_column_name" value="value1_again" /> @@ -81,13 +84,13 @@ <test> <param name="cond" value="c4*1"/> <param name="input" value="1.interval"/> - <param name="round" value="yes"/> + <param name="round" value="true"/> <output name="out_file1" file="column_maker_out3.interval"/> </test> <test> <param name="cond" value="float(.0000000000001)"/> <param name="input" value="1.bed"/> - <param name="round" value="no"/> + <param name="round" value="false"/> <output name="out_file1"> <assert_contents> <has_text text="CCDS10397" /> @@ -98,8 +101,8 @@ <test> <param name="cond" value="float(.0000000000001)"/> <param name="input" value="1.bed"/> - <param name="round" value="no"/> - <param name="avoid_scientific_notation" value="yes"/> + <param name="round" value="false"/> + <param name="avoid_scientific_notation" value="true"/> <output name="out_file1"> <assert_contents> <has_text text="CCDS10397" />