Mercurial > repos > stevecassidy > nltktools
comparison g_chart_parser.py @ 1:fb617586f4b2 draft
planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author | stevecassidy |
---|---|
date | Mon, 05 Dec 2016 05:22:05 -0500 |
parents | e991d4e60c17 |
children | a47980ef2b96 |
comparison
equal
deleted
inserted
replaced
0:e991d4e60c17 | 1:fb617586f4b2 |
---|---|
11 args = parser.parse_args() | 11 args = parser.parse_args() |
12 return args | 12 return args |
13 | 13 |
14 | 14 |
15 def chart_parse(in_file, grammar_file, out_file): | 15 def chart_parse(in_file, grammar_file, out_file): |
16 text = unicode(open(in_file, 'r').read(), errors='ignore') | 16 with open(in_file, 'r') as fd: |
17 output = open(out_file, 'w') | 17 text = fd.read() |
18 grammar_string = unicode(open(grammar_file, 'r').read(), errors='ignore') | 18 |
19 with open(grammar_file, 'r') as fd: | |
20 grammar_string = fd.read() | |
21 | |
19 try: | 22 try: |
20 grammar = nltk.parse_cfg(grammar_string) | 23 grammar = nltk.CFG.fromstring(grammar_string) |
21 parser = nltk.ChartParser(grammar) | 24 parser = nltk.ChartParser(grammar) |
22 sentences = nltk.sent_tokenize(text) | 25 sentences = nltk.sent_tokenize(text) |
23 for sentence in sentences: | 26 with open(out_file, 'w') as output: |
24 words = nltk.word_tokenize(sentence) | 27 for sentence in sentences: |
25 tree = parser.parse(words) | 28 words = nltk.word_tokenize(sentence) |
26 output.write(tree.pprint()) | 29 trees = parser.parse(words) |
27 output.write('\n') | 30 for t in trees: |
28 except Exception, e: | 31 output.write(t.pformat()) |
29 message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e) | 32 output.write('\n') |
33 | |
34 except Exception as e: | |
35 message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e) + "\n" | |
30 sys.stderr.write(message) | 36 sys.stderr.write(message) |
31 sys.exit() | 37 sys.exit() |
32 output.close() | 38 output.close() |
33 | 39 |
34 if __name__ == '__main__': | 40 if __name__ == '__main__': |
35 args = arguments() | 41 args = arguments() |
36 chart_parse(args.input, args.grammar, args.output) | 42 chart_parse(args.input, args.grammar, args.output) |
37 | |
38 |