annotate tabpad.py @ 2:a035215e81f6 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 2948f114101aeefa951f47df8ee3b25656d9680e
author iuc
date Thu, 09 Mar 2023 21:00:23 +0000
parents 0c2e94e8db77
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
2
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
3 import argparse
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
4 import re
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
5
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
6
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
7 def padfile(infile, outfile, fieldcnt=None):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
8 with open(infile, 'r') as fh:
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
9 out = open(outfile, 'w')
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
10 commentlines = []
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
12
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
13 def pad_line(txtline, tabs=None):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
14 line = txtline.rstrip('\r\n')
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
15 fields = line.split('\t')
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
16 if not tabs:
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
17 tabs = '\t' * len(fields)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
18 out.write('%s%s\n' % (line, tabs[len(fields):]))
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
19
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
20 for i, txtline in enumerate(fh):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
21 if txtline.lstrip().startswith('#'):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
22 commentlines.append(txtline)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
23 else:
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
24 if commentlines:
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
25 for i in range(len(commentlines) - 1):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
26 out.write(commentlines[i])
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
27 pad_line(commentlines[-1], tabs=tabs)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
28 commentlines = []
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
29 pad_line(txtline, tabs=tabs)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
30 out.close()
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
31
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
32
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
33 def fieldcount(infile):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
34 fieldcnt = 0
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
35 with open(infile, 'r') as fh:
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
36 for i, line in enumerate(fh):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
37 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
38 return fieldcnt
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
39
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
40
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
41 def tsvname(infile):
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
42 return re.sub('.txt$', '', infile) + '.tsv'
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
43
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
44
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
45 def __main__():
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
46 parser = argparse.ArgumentParser(
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
47 description='Pad a file with TABS for equal field size across lines')
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
48 parser.add_argument(
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
49 '-i', '--input', help='input file')
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
50 parser.add_argument(
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
51 '-o', '--output', help='output file')
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
52 parser.add_argument(
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
53 'files', nargs='*', help='.txt files')
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
54 args = parser.parse_args()
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
55
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
56 if args.input:
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
57 outfile = args.output if args.output else tsvname(args.input)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
58 fieldcnt = fieldcount(args.input)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
59 padfile(args.input, outfile, fieldcnt=fieldcnt)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
60 for infile in args.files:
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
61 outfile = tsvname(infile)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
62 fieldcnt = fieldcount(infile)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
63 padfile(infile, outfile, fieldcnt=fieldcnt)
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
64
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
65
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
66 if __name__ == "__main__":
0c2e94e8db77 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff changeset
67 __main__()