comparison column_maker.py @ 8:227e82286a0e draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit c623e4b9f6dd6ff4b88d9386f3b0a0cb497e0a0e"
author devteam
date Wed, 24 Feb 2021 05:19:12 +0000
parents e7c273e8d4d6
children 33b81f9ea109
comparison
equal deleted inserted replaced
7:e7c273e8d4d6 8:227e82286a0e
8 8
9 import argparse 9 import argparse
10 import json 10 import json
11 import re 11 import re
12 import sys 12 import sys
13 # functions that may be used in the compute expression
14 from math import ( # noqa: F401
15 ceil,
16 exp,
17 floor,
18 log,
19 log10,
20 sqrt
21 )
22
23 from numpy import format_float_positional # noqa: F401
13 24
14 parser = argparse.ArgumentParser() 25 parser = argparse.ArgumentParser()
15 parser.add_argument('input', type=argparse.FileType('r'), help="input file") 26 parser.add_argument('input', type=argparse.FileType('r'), help="input file")
16 parser.add_argument('output', type=argparse.FileType('wt'), help="output file") 27 parser.add_argument('output', type=argparse.FileType('wt'), help="output file")
17 parser.add_argument('cond', nargs='?', type=str, help="expression") 28 parser.add_argument('cond', nargs='?', type=str, help="expression")
47 header_line, argparse_dict['header_new_column_name'] 58 header_line, argparse_dict['header_new_column_name']
48 ) 59 )
49 ) 60 )
50 try: 61 try:
51 in_columns = int(argparse_dict['columns']) 62 in_columns = int(argparse_dict['columns'])
52 if in_columns < 2: 63 if in_columns < 1:
53 # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method. 64 # To be considered tabular, data must have at least one column.
54 raise ValueError 65 raise ValueError
55 except Exception: 66 except Exception:
56 if not fh.readline(): 67 if not fh.readline():
57 # empty file content is ok and should produce empty output 68 # empty file content is ok and should produce empty output
58 out.close() 69 out.close()
86 type_casts.append(type_cast) 97 type_casts.append(type_cast)
87 98
88 col_str = ', '.join(cols) # 'c1, c2, c3, c4' 99 col_str = ', '.join(cols) # 'c1, c2, c3, c4'
89 type_cast_str = ', '.join(type_casts) # 'str(c1), int(c2), int(c3), str(c4)' 100 type_cast_str = ', '.join(type_casts) # 'str(c1), int(c2), int(c3), str(c4)'
90 assign = "%s = line.split('\\t')" % col_str 101 assign = "%s = line.split('\\t')" % col_str
102 if len(cols) == 1:
103 # Single column, unpacking by assignment won't work
104 assign += '[0]'
91 wrap = "%s = %s" % (col_str, type_cast_str) 105 wrap = "%s = %s" % (col_str, type_cast_str)
92 skipped_lines = 0 106 skipped_lines = 0
93 first_invalid_line = 0 107 first_invalid_line = 0
94 invalid_line = None 108 invalid_line = None
95 lines_kept = 0 109 lines_kept = 0
96 total_lines = 0 110 total_lines = 0
97 111
98 # Read input file, skipping invalid lines, and perform computation that will result in a new column 112 # Read input file, skipping invalid lines, and perform computation that will result in a new column
99 code = ''' 113 code = '''
100 # import here since flake8 complains otherwise
101 from math import (
102 ceil,
103 exp,
104 floor,
105 log,
106 log10,
107 sqrt
108 )
109 from numpy import format_float_positional
110
111 for i, line in enumerate(fh): 114 for i, line in enumerate(fh):
112 total_lines += 1 115 total_lines += 1
113 line = line.rstrip('\\r\\n') 116 line = line.rstrip('\\r\\n')
114 if not line or line.startswith('#'): 117 if not line or line.startswith('#'):
115 skipped_lines += 1 118 skipped_lines += 1