changeset 2:1dfc6d345abd draft

Uploaded
author julius
date Wed, 01 Aug 2012 07:36:48 -0400
parents 847d5c804ec4
children dc24da0a3bd5
files convert_characters.py
diffstat 1 files changed, 42 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_characters.py	Wed Aug 01 07:36:48 2012 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+#By, Guruprasad Ananda.
+
+from galaxy import eggs
+import sys, re
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+    
+def main():
+    if len(sys.argv) != 4:
+        stop_err("usage: convert_characters infile from_char outfile")
+
+    try:
+        fin = open(sys.argv[1],'r')
+    except:
+        stop_err("Input file cannot be opened for reading.")
+    
+    from_char = sys.argv[2]
+    
+    try:
+        fout = open(sys.argv[3],'w')
+    except:
+        stop_err("Output file cannot be opened for writing.")
+    
+    char_dict = {'T':'\t','s':'\s','Dt':'\.','C':',','D':'-','U':'_','P':'\|'}
+    from_ch = char_dict[from_char] + '+'    #making an RE to match 1 or more occurences.
+    skipped = 0
+    
+    for line in fin:
+        line = line.strip()
+        try:
+            fout.write("%s\n" %(re.sub(from_ch,'\t',line)))     
+        except:
+            skipped += 1
+            
+    if skipped:
+        print "Skipped %d lines as invalid." %skipped
+    
+if __name__ == "__main__": 
+    main()
\ No newline at end of file