Mercurial > repos > stevecassidy > nltktools
diff g_frequency.py @ 1:fb617586f4b2 draft
planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author | stevecassidy |
---|---|
date | Mon, 05 Dec 2016 05:22:05 -0500 |
parents | e991d4e60c17 |
children | a47980ef2b96 |
line wrap: on
line diff
--- a/g_frequency.py Wed Oct 12 22:17:53 2016 -0400 +++ b/g_frequency.py Mon Dec 05 05:22:05 2016 -0500 @@ -14,15 +14,17 @@ """Input: a text file Output: a table of word frequency with three columns for Word, Count and Percent frequency """ - text = unicode(open(in_file, 'r').read(), errors='ignore') + with open(in_file, 'r') as fd: + text = fd.read() + words = nltk.word_tokenize(text) frequency = FreqDist(words) total = float(frequency.N()) - output = open(out_file, 'w') - output.write("Word\tCount\tPercent\n") - for pair in frequency.items(): - output.write("{pair[0]}\t{pair[1]}\t{pc:.2f}\n".format(pair=pair, pc=100*pair[1]/total)) - output.close() + + with open(out_file, 'w') as output: + output.write("Word\tCount\tPercent\n") + for pair in frequency.items(): + output.write("{pair[0]}\t{pair[1]}\t{pc:.2f}\n".format(pair=pair, pc=100*pair[1]/total)) if __name__ == '__main__':