# HG changeset patch # User julius # Date 1343821008 14400 # Node ID 1dfc6d345abd98eeb9d736fad2f51b46ae8a0626 # Parent 847d5c804ec4aca1d549a8116dcd890af6a22696 Uploaded diff -r 847d5c804ec4 -r 1dfc6d345abd convert_characters.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_characters.py Wed Aug 01 07:36:48 2012 -0400 @@ -0,0 +1,42 @@ +#!/usr/bin/env python +#By, Guruprasad Ananda. + +from galaxy import eggs +import sys, re + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + +def main(): + if len(sys.argv) != 4: + stop_err("usage: convert_characters infile from_char outfile") + + try: + fin = open(sys.argv[1],'r') + except: + stop_err("Input file cannot be opened for reading.") + + from_char = sys.argv[2] + + try: + fout = open(sys.argv[3],'w') + except: + stop_err("Output file cannot be opened for writing.") + + char_dict = {'T':'\t','s':'\s','Dt':'\.','C':',','D':'-','U':'_','P':'\|'} + from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences. + skipped = 0 + + for line in fin: + line = line.strip() + try: + fout.write("%s\n" %(re.sub(from_ch,'\t',line))) + except: + skipped += 1 + + if skipped: + print "Skipped %d lines as invalid." %skipped + +if __name__ == "__main__": + main() \ No newline at end of file