comparison g_chart_parser.py @ 1:fb617586f4b2 draft

planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author stevecassidy
date Mon, 05 Dec 2016 05:22:05 -0500
parents e991d4e60c17
children a47980ef2b96
comparison
equal deleted inserted replaced
0:e991d4e60c17 1:fb617586f4b2
11 args = parser.parse_args() 11 args = parser.parse_args()
12 return args 12 return args
13 13
14 14
15 def chart_parse(in_file, grammar_file, out_file): 15 def chart_parse(in_file, grammar_file, out_file):
16 text = unicode(open(in_file, 'r').read(), errors='ignore') 16 with open(in_file, 'r') as fd:
17 output = open(out_file, 'w') 17 text = fd.read()
18 grammar_string = unicode(open(grammar_file, 'r').read(), errors='ignore') 18
19 with open(grammar_file, 'r') as fd:
20 grammar_string = fd.read()
21
19 try: 22 try:
20 grammar = nltk.parse_cfg(grammar_string) 23 grammar = nltk.CFG.fromstring(grammar_string)
21 parser = nltk.ChartParser(grammar) 24 parser = nltk.ChartParser(grammar)
22 sentences = nltk.sent_tokenize(text) 25 sentences = nltk.sent_tokenize(text)
23 for sentence in sentences: 26 with open(out_file, 'w') as output:
24 words = nltk.word_tokenize(sentence) 27 for sentence in sentences:
25 tree = parser.parse(words) 28 words = nltk.word_tokenize(sentence)
26 output.write(tree.pprint()) 29 trees = parser.parse(words)
27 output.write('\n') 30 for t in trees:
28 except Exception, e: 31 output.write(t.pformat())
29 message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e) 32 output.write('\n')
33
34 except Exception as e:
35 message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e) + "\n"
30 sys.stderr.write(message) 36 sys.stderr.write(message)
31 sys.exit() 37 sys.exit()
32 output.close() 38 output.close()
33 39
34 if __name__ == '__main__': 40 if __name__ == '__main__':
35 args = arguments() 41 args = arguments()
36 chart_parse(args.input, args.grammar, args.output) 42 chart_parse(args.input, args.grammar, args.output)
37
38