annotate replace_delimiter/replace_delimiter.py @ 0:101608e1f388 draft default tip

Uploaded
author saketkc
date Tue, 07 Oct 2014 18:59:45 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
101608e1f388 Uploaded
saketkc
parents:
diff changeset
1 #!/usr/bin/env python
101608e1f388 Uploaded
saketkc
parents:
diff changeset
2 #By, Guruprasad Ananda.
101608e1f388 Uploaded
saketkc
parents:
diff changeset
3
101608e1f388 Uploaded
saketkc
parents:
diff changeset
4 import sys, re
101608e1f388 Uploaded
saketkc
parents:
diff changeset
5
101608e1f388 Uploaded
saketkc
parents:
diff changeset
6 def stop_err(msg):
101608e1f388 Uploaded
saketkc
parents:
diff changeset
7 sys.stderr.write(msg)
101608e1f388 Uploaded
saketkc
parents:
diff changeset
8 sys.exit()
101608e1f388 Uploaded
saketkc
parents:
diff changeset
9
101608e1f388 Uploaded
saketkc
parents:
diff changeset
10 def main():
101608e1f388 Uploaded
saketkc
parents:
diff changeset
11 if len(sys.argv) != 5:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
12 stop_err("usage: convert_characters infile from_char to_char outfile")
101608e1f388 Uploaded
saketkc
parents:
diff changeset
13
101608e1f388 Uploaded
saketkc
parents:
diff changeset
14 try:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
15 fin = open(sys.argv[1],'r')
101608e1f388 Uploaded
saketkc
parents:
diff changeset
16 except:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
17 stop_err("Input file cannot be opened for reading.")
101608e1f388 Uploaded
saketkc
parents:
diff changeset
18
101608e1f388 Uploaded
saketkc
parents:
diff changeset
19 from_char = sys.argv[2]
101608e1f388 Uploaded
saketkc
parents:
diff changeset
20 to_char = sys.argv[3]
101608e1f388 Uploaded
saketkc
parents:
diff changeset
21
101608e1f388 Uploaded
saketkc
parents:
diff changeset
22 try:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
23 fout = open(sys.argv[4],'w')
101608e1f388 Uploaded
saketkc
parents:
diff changeset
24 except:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
25 stop_err("Output file cannot be opened for writing.")
101608e1f388 Uploaded
saketkc
parents:
diff changeset
26
101608e1f388 Uploaded
saketkc
parents:
diff changeset
27 char_dict = {
101608e1f388 Uploaded
saketkc
parents:
diff changeset
28 'T': '\t',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
29 's': '\s',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
30 'Dt': '\.',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
31 'C': ',',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
32 'D': '-',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
33 'U': '_',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
34 'P': '\|',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
35 'Co': ':',
101608e1f388 Uploaded
saketkc
parents:
diff changeset
36 'Sc': ';'
101608e1f388 Uploaded
saketkc
parents:
diff changeset
37 }
101608e1f388 Uploaded
saketkc
parents:
diff changeset
38 from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences.
101608e1f388 Uploaded
saketkc
parents:
diff changeset
39 to_char = char_dict[to_char]
101608e1f388 Uploaded
saketkc
parents:
diff changeset
40 skipped = 0
101608e1f388 Uploaded
saketkc
parents:
diff changeset
41
101608e1f388 Uploaded
saketkc
parents:
diff changeset
42 for line in fin:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
43 line = line.strip()
101608e1f388 Uploaded
saketkc
parents:
diff changeset
44 try:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
45 fout.write("%s\n" %(re.sub(from_ch, to_char, line)))
101608e1f388 Uploaded
saketkc
parents:
diff changeset
46 except:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
47 skipped += 1
101608e1f388 Uploaded
saketkc
parents:
diff changeset
48
101608e1f388 Uploaded
saketkc
parents:
diff changeset
49 fout.close()
101608e1f388 Uploaded
saketkc
parents:
diff changeset
50 fin.close()
101608e1f388 Uploaded
saketkc
parents:
diff changeset
51 if skipped:
101608e1f388 Uploaded
saketkc
parents:
diff changeset
52 print "Skipped %d lines as invalid." %skipped
101608e1f388 Uploaded
saketkc
parents:
diff changeset
53
101608e1f388 Uploaded
saketkc
parents:
diff changeset
54 if __name__ == "__main__":
101608e1f388 Uploaded
saketkc
parents:
diff changeset
55 main()