comparison column_maker.py @ 6:0aeda7a81b46 draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit a993d43d9d1702a6cf584683cf72527a3f999236"
author devteam
date Wed, 30 Dec 2020 00:49:52 +0000
parents c6fdc1118036
children e7c273e8d4d6
comparison
equal deleted inserted replaced
5:c6fdc1118036 6:0aeda7a81b46
3 This tool takes a tab-delimited textfile as input and creates another column in 3 This tool takes a tab-delimited textfile as input and creates another column in
4 the file which is the result of a computation performed on every row in the 4 the file which is the result of a computation performed on every row in the
5 original file. The tool will skip over invalid lines within the file, 5 original file. The tool will skip over invalid lines within the file,
6 informing the user about the number of lines skipped. 6 informing the user about the number of lines skipped.
7 """ 7 """
8 from __future__ import print_function
9 8
9 import argparse
10 import json
10 import re 11 import re
11 import sys
12 12
13 assert sys.version_info[:2] >= (2, 4) 13 parser = argparse.ArgumentParser()
14 parser.add_argument('input', type=argparse.FileType('r'), help="input file")
15 parser.add_argument('output', type=argparse.FileType('wt'), help="output file")
16 parser.add_argument('cond', nargs='?', type=str, help="expression")
17 parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'],
18 help="round result")
19 parser.add_argument('columns', nargs='?', type=int, help="number of columns")
20 parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types")
21 parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'],
22 help="avoid scientific notation")
23 parser.add_argument('--load_json', default=None, type=argparse.FileType('r'),
24 help="overwrite parsed arguments from json file")
25 args = parser.parse_args()
14 26
15 inp_file = sys.argv[1] 27 argparse_dict = vars(args)
16 out_file = sys.argv[2] 28 if args.load_json:
17 expr = sys.argv[3] 29 json_dict = json.load(args.load_json)
18 round_result = sys.argv[4] 30 argparse_dict.update(json_dict)
31
32 fh = argparse_dict['input']
33 out = argparse_dict['output']
34 expr = argparse_dict['cond']
35 round_result = argparse_dict['round']
19 try: 36 try:
20 in_columns = int(sys.argv[5]) 37 in_columns = int(argparse_dict['columns'])
21 except Exception: 38 except Exception:
22 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") 39 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
23 if in_columns < 2: 40 if in_columns < 2:
24 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. 41 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
25 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") 42 exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
26 try: 43 try:
27 in_column_types = sys.argv[6].split(',') 44 in_column_types = argparse_dict['column_types'].split(',')
28 except Exception: 45 except Exception:
29 exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") 46 exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
30 if len(in_column_types) != in_columns: 47 if len(in_column_types) != in_columns:
31 exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.") 48 exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it. This tool can only be used with tab-delimited data.")
32 avoid_scientific_notation = sys.argv[7] 49 avoid_scientific_notation = argparse_dict['avoid_scientific_notation']
33 50
34 # Unescape if input has been escaped 51 # Unescape if input has been escaped
35 mapped_str = { 52 mapped_str = {
36 '__lt__': '<', 53 '__lt__': '<',
37 '__le__': '<=', 54 '__le__': '<=',
72 skipped_lines = 0 89 skipped_lines = 0
73 first_invalid_line = 0 90 first_invalid_line = 0
74 invalid_line = None 91 invalid_line = None
75 lines_kept = 0 92 lines_kept = 0
76 total_lines = 0 93 total_lines = 0
77 out = open(out_file, 'wt')
78 94
79 # Read input file, skipping invalid lines, and perform computation that will result in a new column 95 # Read input file, skipping invalid lines, and perform computation that will result in a new column
80 code = ''' 96 code = '''
81 # import here since flake8 complains otherwise 97 # import here since flake8 complains otherwise
82 from math import ( 98 from math import (
87 log10, 103 log10,
88 sqrt 104 sqrt
89 ) 105 )
90 from numpy import format_float_positional 106 from numpy import format_float_positional
91 107
92 fh = open(inp_file)
93 for i, line in enumerate(fh): 108 for i, line in enumerate(fh):
94 total_lines += 1 109 total_lines += 1
95 line = line.rstrip('\\r\\n') 110 line = line.rstrip('\\r\\n')
96 if not line or line.startswith('#'): 111 if not line or line.startswith('#'):
97 skipped_lines += 1 112 skipped_lines += 1