comparison g_frequency.py @ 1:fb617586f4b2 draft

planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author stevecassidy
date Mon, 05 Dec 2016 05:22:05 -0500
parents e991d4e60c17
children a47980ef2b96
comparison
equal deleted inserted replaced
0:e991d4e60c17 1:fb617586f4b2
12 12
13 def frequency(in_file, out_file): 13 def frequency(in_file, out_file):
14 """Input: a text file 14 """Input: a text file
15 Output: a table of word frequency with three columns for Word, Count and Percent frequency 15 Output: a table of word frequency with three columns for Word, Count and Percent frequency
16 """ 16 """
17 text = unicode(open(in_file, 'r').read(), errors='ignore') 17 with open(in_file, 'r') as fd:
18 text = fd.read()
19
18 words = nltk.word_tokenize(text) 20 words = nltk.word_tokenize(text)
19 frequency = FreqDist(words) 21 frequency = FreqDist(words)
20 total = float(frequency.N()) 22 total = float(frequency.N())
21 output = open(out_file, 'w') 23
22 output.write("Word\tCount\tPercent\n") 24 with open(out_file, 'w') as output:
23 for pair in frequency.items(): 25 output.write("Word\tCount\tPercent\n")
24 output.write("{pair[0]}\t{pair[1]}\t{pc:.2f}\n".format(pair=pair, pc=100*pair[1]/total)) 26 for pair in frequency.items():
25 output.close() 27 output.write("{pair[0]}\t{pair[1]}\t{pc:.2f}\n".format(pair=pair, pc=100*pair[1]/total))
26 28
27 29
28 if __name__ == '__main__': 30 if __name__ == '__main__':
29 args = arguments() 31 args = arguments()
30 frequency(args.input, args.output) 32 frequency(args.input, args.output)