Mercurial > repos > stevecassidy > nltktools

diff g_frequency.py @ 1:fb617586f4b2 draft
planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author: stevecassidy
date: Mon, 05 Dec 2016 05:22:05 -0500
parents: e991d4e60c17
children: a47980ef2b96
--- a/g_frequency.py	Wed Oct 12 22:17:53 2016 -0400
+++ b/g_frequency.py	Mon Dec 05 05:22:05 2016 -0500
@@ -14,15 +14,17 @@
     """Input: a text file
     Output: a table of word frequency with three columns for Word, Count and Percent frequency
     """
-    text = unicode(open(in_file, 'r').read(), errors='ignore')
+    with open(in_file, 'r') as fd:
+        text = fd.read()
+
     words = nltk.word_tokenize(text)
     frequency = FreqDist(words)
     total = float(frequency.N())
-    output = open(out_file, 'w')
-    output.write("Word\tCount\tPercent\n")
-    for pair in frequency.items():
-        output.write("{pair[0]}\t{pair[1]}\t{pc:.2f}\n".format(pair=pair, pc=100*pair[1]/total))
-    output.close()
+    
+    with open(out_file, 'w') as output:
+        output.write("Word\tCount\tPercent\n")
+        for pair in frequency.items():
+            output.write("{pair[0]}\t{pair[1]}\t{pc:.2f}\n".format(pair=pair, pc=100*pair[1]/total))
 
 
 if __name__ == '__main__':