Mercurial > repos > devteam > column_maker
comparison column_maker.py @ 6:0aeda7a81b46 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit a993d43d9d1702a6cf584683cf72527a3f999236"
author | devteam |
---|---|
date | Wed, 30 Dec 2020 00:49:52 +0000 |
parents | c6fdc1118036 |
children | e7c273e8d4d6 |
comparison
equal
deleted
inserted
replaced
5:c6fdc1118036 | 6:0aeda7a81b46 |
---|---|
3 This tool takes a tab-delimited textfile as input and creates another column in | 3 This tool takes a tab-delimited textfile as input and creates another column in |
4 the file which is the result of a computation performed on every row in the | 4 the file which is the result of a computation performed on every row in the |
5 original file. The tool will skip over invalid lines within the file, | 5 original file. The tool will skip over invalid lines within the file, |
6 informing the user about the number of lines skipped. | 6 informing the user about the number of lines skipped. |
7 """ | 7 """ |
8 from __future__ import print_function | |
9 | 8 |
9 import argparse | |
10 import json | |
10 import re | 11 import re |
11 import sys | |
12 | 12 |
13 assert sys.version_info[:2] >= (2, 4) | 13 parser = argparse.ArgumentParser() |
14 parser.add_argument('input', type=argparse.FileType('r'), help="input file") | |
15 parser.add_argument('output', type=argparse.FileType('wt'), help="output file") | |
16 parser.add_argument('cond', nargs='?', type=str, help="expression") | |
17 parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'], | |
18 help="round result") | |
19 parser.add_argument('columns', nargs='?', type=int, help="number of columns") | |
20 parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types") | |
21 parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'], | |
22 help="avoid scientific notation") | |
23 parser.add_argument('--load_json', default=None, type=argparse.FileType('r'), | |
24 help="overwrite parsed arguments from json file") | |
25 args = parser.parse_args() | |
14 | 26 |
15 inp_file = sys.argv[1] | 27 argparse_dict = vars(args) |
16 out_file = sys.argv[2] | 28 if args.load_json: |
17 expr = sys.argv[3] | 29 json_dict = json.load(args.load_json) |
18 round_result = sys.argv[4] | 30 argparse_dict.update(json_dict) |
31 | |
32 fh = argparse_dict['input'] | |
33 out = argparse_dict['output'] | |
34 expr = argparse_dict['cond'] | |
35 round_result = argparse_dict['round'] | |
19 try: | 36 try: |
20 in_columns = int(sys.argv[5]) | 37 in_columns = int(argparse_dict['columns']) |
21 except Exception: | 38 except Exception: |
22 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 39 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
23 if in_columns < 2: | 40 if in_columns < 2: |
24 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. | 41 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. |
25 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 42 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
26 try: | 43 try: |
27 in_column_types = sys.argv[6].split(',') | 44 in_column_types = argparse_dict['column_types'].split(',') |
28 except Exception: | 45 except Exception: |
29 exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 46 exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
30 if len(in_column_types) != in_columns: | 47 if len(in_column_types) != in_columns: |
31 exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") | 48 exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") |
32 avoid_scientific_notation = sys.argv[7] | 49 avoid_scientific_notation = argparse_dict['avoid_scientific_notation'] |
33 | 50 |
34 # Unescape if input has been escaped | 51 # Unescape if input has been escaped |
35 mapped_str = { | 52 mapped_str = { |
36 '__lt__': '<', | 53 '__lt__': '<', |
37 '__le__': '<=', | 54 '__le__': '<=', |
72 skipped_lines = 0 | 89 skipped_lines = 0 |
73 first_invalid_line = 0 | 90 first_invalid_line = 0 |
74 invalid_line = None | 91 invalid_line = None |
75 lines_kept = 0 | 92 lines_kept = 0 |
76 total_lines = 0 | 93 total_lines = 0 |
77 out = open(out_file, 'wt') | |
78 | 94 |
79 # Read input file, skipping invalid lines, and perform computation that will result in a new column | 95 # Read input file, skipping invalid lines, and perform computation that will result in a new column |
80 code = ''' | 96 code = ''' |
81 # import here since flake8 complains otherwise | 97 # import here since flake8 complains otherwise |
82 from math import ( | 98 from math import ( |
87 log10, | 103 log10, |
88 sqrt | 104 sqrt |
89 ) | 105 ) |
90 from numpy import format_float_positional | 106 from numpy import format_float_positional |
91 | 107 |
92 fh = open(inp_file) | |
93 for i, line in enumerate(fh): | 108 for i, line in enumerate(fh): |
94 total_lines += 1 | 109 total_lines += 1 |
95 line = line.rstrip('\\r\\n') | 110 line = line.rstrip('\\r\\n') |
96 if not line or line.startswith('#'): | 111 if not line or line.startswith('#'): |
97 skipped_lines += 1 | 112 skipped_lines += 1 |