Mercurial > repos > stevecassidy > nltktools
comparison g_read_sents.py @ 1:fb617586f4b2 draft
planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author | stevecassidy |
---|---|
date | Mon, 05 Dec 2016 05:22:05 -0500 |
parents | e991d4e60c17 |
children | a47980ef2b96 |
comparison
equal
deleted
inserted
replaced
0:e991d4e60c17 | 1:fb617586f4b2 |
---|---|
11 | 11 |
12 args = the_parser.parse_args() | 12 args = the_parser.parse_args() |
13 return args | 13 return args |
14 | 14 |
15 def print_out(outp, text, sentences): | 15 def print_out(outp, text, sentences): |
16 o = open(outp, 'w') | 16 with open(outp, 'w') as output: |
17 curr = 0 | 17 curr = 0 |
18 for sent in sentences: | 18 for sent in sentences: |
19 times = count_occurences(sent, sent[-1]) | 19 times = count_occurences(sent, sent[-1]) |
20 curr = text.find(sent[0], curr) | 20 curr = text.find(sent[0], curr) |
21 end = find_nth(text, sent[-1], times, curr) + len(sent[-1]) | 21 end = find_nth(text, sent[-1], times, curr) + len(sent[-1]) |
22 o.write(text[curr:end] + '\n') | 22 output.write(text[curr:end] + '\n') |
23 curr = end | 23 curr = end |
24 o.close() | |
25 | 24 |
26 def find_nth(string, sub, n, offset): | 25 def find_nth(string, sub, n, offset): |
27 start = string.find(sub, offset) | 26 start = string.find(sub, offset) |
28 while start >= 0 and n > 1: | 27 while start >= 0 and n > 1: |
29 start = string.find(sub, start + len(sub)) | 28 start = string.find(sub, start + len(sub)) |
36 if string in item: | 35 if string in item: |
37 count += 1 | 36 count += 1 |
38 return count | 37 return count |
39 | 38 |
40 def read_sents(inp, outp): | 39 def read_sents(inp, outp): |
41 i = open(inp, 'r').read() | 40 with open(inp, 'r') as fd: |
41 i = fd.read() | |
42 corpus = PlaintextCorpusReader(os.path.dirname(inp), os.path.basename(inp)) | 42 corpus = PlaintextCorpusReader(os.path.dirname(inp), os.path.basename(inp)) |
43 sents = corpus.sents() | 43 sents = corpus.sents() |
44 print_out(outp, i, sents) | 44 print_out(outp, i, sents) |
45 | 45 |
46 if __name__ == '__main__': | 46 if __name__ == '__main__': |