Mercurial > repos > devteam > column_maker
diff column_maker.py @ 7:e7c273e8d4d6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit 35c79ccee143e8d178ebd24f6317888de3ca0187"
author | devteam |
---|---|
date | Mon, 25 Jan 2021 09:59:49 +0000 |
parents | 0aeda7a81b46 |
children | 227e82286a0e |
line wrap: on
line diff
--- a/column_maker.py Wed Dec 30 00:49:52 2020 +0000 +++ b/column_maker.py Mon Jan 25 09:59:49 2021 +0000 @@ -9,17 +9,22 @@ import argparse import json import re +import sys parser = argparse.ArgumentParser() parser.add_argument('input', type=argparse.FileType('r'), help="input file") parser.add_argument('output', type=argparse.FileType('wt'), help="output file") parser.add_argument('cond', nargs='?', type=str, help="expression") -parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'], - help="round result") parser.add_argument('columns', nargs='?', type=int, help="number of columns") parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types") -parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'], +parser.add_argument('--round', action="store_true", + help="round result") +parser.add_argument('--avoid_scientific_notation', action="store_true", help="avoid scientific notation") +parser.add_argument('--header_new_column_name', default=None, type=str, + help="First line of input is a header line with column " + "names and this should become the name of the new " + "column") parser.add_argument('--load_json', default=None, type=argparse.FileType('r'), help="overwrite parsed arguments from json file") args = parser.parse_args() @@ -33,42 +38,40 @@ out = argparse_dict['output'] expr = argparse_dict['cond'] round_result = argparse_dict['round'] +avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] + +if argparse_dict['header_new_column_name'] is not None: + header_line = fh.readline().strip('\n') + out.write( + '{0}\t{1}\n'.format( + header_line, argparse_dict['header_new_column_name'] + ) + ) try: in_columns = int(argparse_dict['columns']) + if in_columns < 2: + # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. + raise ValueError except Exception: - exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") -if in_columns < 2: - # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. - exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") + if not fh.readline(): + # empty file content is ok and should produce empty output + out.close() + sys.exit() + sys.exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") try: in_column_types = argparse_dict['column_types'].split(',') except Exception: - exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") + sys.exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") if len(in_column_types) != in_columns: - exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") -avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] - -# Unescape if input has been escaped -mapped_str = { - '__lt__': '<', - '__le__': '<=', - '__eq__': '==', - '__ne__': '!=', - '__gt__': '>', - '__ge__': '>=', - '__sq__': '\'', - '__dq__': '"', -} -for key, value in mapped_str.items(): - expr = expr.replace(key, value) + sys.exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") operators = 'is|not|or|and' builtin_and_math_functions = 'abs|all|any|bin|chr|cmp|complex|divmod|float|bool|hex|int|len|long|max|min|oct|ord|pow|range|reversed|round|sorted|str|sum|type|unichr|unicode|log|log10|exp|sqrt|ceil|floor' string_and_list_methods = [name for name in dir('') + dir([]) if not name.startswith('_')] whitelist = r"^([c0-9\+\-\*\/\(\)\.\'\"><=,:! ]|%s|%s|%s)*$" % (operators, builtin_and_math_functions, '|'.join(string_and_list_methods)) if not re.compile(whitelist).match(expr): - exit("Invalid expression") -if avoid_scientific_notation == "yes": + sys.exit("Invalid expression") +if avoid_scientific_notation: expr = "format_float_positional(%s)" % expr # Prepare the column variable names and wrappers for column data types @@ -77,7 +80,7 @@ col_name = "c%d" % col cols.append(col_name) col_type = in_column_types[col - 1].strip() - if round_result == 'no' and col_type == 'int': + if not round_result and col_type == 'int': col_type = 'float' type_cast = "%s(%s)" % (col_type, col_name) type_casts.append(type_cast) @@ -118,7 +121,7 @@ %s %s new_val = %s - if round_result == "yes": + if round_result: new_val = int(round(new_val)) new_line = line + '\\t' + str(new_val) + "\\n" out.write(new_line) @@ -138,9 +141,9 @@ out.close() if str(e).startswith('invalid syntax'): valid_expr = False - exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) + sys.exit('Expression "%s" likely invalid. See tool tips, syntax and examples.' % expr) else: - exit(str(e)) + sys.exit(str(e)) if valid_expr: out.close()