Mercurial > repos > galaxyp > regex_find_replace
annotate regex.py @ 5:96eb105e71d8 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 3eaa2a2bdaeedf1b0771b1b764a550106dcbc186
| author | galaxyp |
|---|---|
| date | Thu, 02 Feb 2023 08:21:21 +0000 |
| parents | d72a404cef12 |
| children |
| rev | line source |
|---|---|
|
4
d72a404cef12
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit a9b01eafeefb50c416fbfe342b15298fe9321679
galaxyp
parents:
2
diff
changeset
|
1 import re |
|
2
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
2 from optparse import OptionParser |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
3 |
|
0
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
4 |
|
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
5 def main(): |
|
2
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
6 parser = OptionParser() |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
7 parser.add_option("--input", dest="input") |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
8 parser.add_option("--output", dest="output") |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
9 parser.add_option("--input_display_name", dest="input_display_name") |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
10 parser.add_option("--pattern", dest="patterns", action="append", |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
11 help="regex pattern for replacement") |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
12 parser.add_option("--replacement", dest="replacements", action="append", |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
13 help="replacement for regex match") |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
14 parser.add_option("--column", dest="column", default=None) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
15 (options, args) = parser.parse_args() |
|
0
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
16 |
|
2
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
17 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} |
|
0
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
18 |
|
2
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
19 column = None |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
20 if options.column is not None: |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
21 # galaxy tabular is 1-based, python array are zero-based |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
22 column = int(options.column) - 1 |
|
0
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
23 |
|
2
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
24 with open(options.input, 'r') as input, open(options.output, 'w') as output: |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
25 while True: |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
26 line = input.readline() |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
27 if line == "": |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
28 break |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
29 for (pattern, replacement) in zip(options.patterns, options.replacements): |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
30 for key, value in mapped_chars.items(): |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
31 pattern = pattern.replace(value, key) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
32 replacement = replacement.replace(value, key) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
33 replacement = replacement.replace("#{input_name}", options.input_display_name) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
34 if column is None: |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
35 line = re.sub(pattern, replacement, line) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
36 else: |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
37 cells = line.split("\t") |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
38 if cells and len(cells) > column: |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
39 cell = cells[column] |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
40 cell = re.sub(pattern, replacement, cell) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
41 cells[column] = cell |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
42 line = "\t".join(cells) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
43 output.write(line) |
|
6dcfb96bc8b0
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 8871a847daed1f996c0a2069c3e876685bf8d220"
galaxyp
parents:
0
diff
changeset
|
44 |
|
0
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
45 |
|
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
46 if __name__ == "__main__": |
|
002f95cf9d6e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff
changeset
|
47 main() |
