annotate regex.py @ 0:002f95cf9d6e draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
author galaxyp
date Wed, 18 Jan 2017 17:45:06 -0500
parents
children 6dcfb96bc8b0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
1 import sys
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
2 import os
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
3 import re
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
4 import string
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
5 import commands
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
6 from optparse import OptionParser
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
7 from tempfile import NamedTemporaryFile
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
8
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
9 def main():
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
10 parser = OptionParser()
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
11 parser.add_option("--input", dest="input")
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
12 parser.add_option("--output", dest="output")
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
13 parser.add_option("--input_display_name", dest="input_display_name")
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
14 parser.add_option("--pattern", dest="patterns", action="append",
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
15 help="regex pattern for replacement")
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
16 parser.add_option("--replacement", dest="replacements", action="append",
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
17 help="replacement for regex match")
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
18 parser.add_option("--column", dest="column", default=None)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
19 (options, args) = parser.parse_args()
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
20
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
21 mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' }
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
22
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
23 column = None
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
24 if options.column is not None:
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
25 column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
26
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
27 with open(options.input, 'r') as input:
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
28 with open(options.output, 'w') as output:
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
29 while True:
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
30 line = input.readline()
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
31 if line == "":
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
32 break
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
33 for (pattern, replacement) in zip(options.patterns, options.replacements):
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
34 for key, value in mapped_chars.items():
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
35 pattern = pattern.replace(value, key)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
36 replacement = replacement.replace(value, key)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
37 replacement = replacement.replace("#{input_name}", options.input_display_name)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
38 if column is None:
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
39 line = re.sub(pattern, replacement, line)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
40 else:
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
41 cells = line.split("\t")
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
42 if cells and len(cells) > column:
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
43 cell = cells[column]
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
44 cell = re.sub(pattern, replacement, cell)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
45 cells[column] = cell
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
46 line = "\t".join(cells)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
47 output.write(line)
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
48
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
49 if __name__ == "__main__":
002f95cf9d6e planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 568a615b191482c54ecb31399ba27f78d6c71510
galaxyp
parents:
diff changeset
50 main()