Mercurial > repos > stevecassidy > nltktools
diff g_read_sents.py @ 1:fb617586f4b2 draft
planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author | stevecassidy |
---|---|
date | Mon, 05 Dec 2016 05:22:05 -0500 |
parents | e991d4e60c17 |
children | a47980ef2b96 |
line wrap: on
line diff
--- a/g_read_sents.py Wed Oct 12 22:17:53 2016 -0400 +++ b/g_read_sents.py Mon Dec 05 05:22:05 2016 -0500 @@ -13,15 +13,14 @@ return args def print_out(outp, text, sentences): - o = open(outp, 'w') - curr = 0 - for sent in sentences: - times = count_occurences(sent, sent[-1]) - curr = text.find(sent[0], curr) - end = find_nth(text, sent[-1], times, curr) + len(sent[-1]) - o.write(text[curr:end] + '\n') - curr = end - o.close() + with open(outp, 'w') as output: + curr = 0 + for sent in sentences: + times = count_occurences(sent, sent[-1]) + curr = text.find(sent[0], curr) + end = find_nth(text, sent[-1], times, curr) + len(sent[-1]) + output.write(text[curr:end] + '\n') + curr = end def find_nth(string, sub, n, offset): start = string.find(sub, offset) @@ -38,7 +37,8 @@ return count def read_sents(inp, outp): - i = open(inp, 'r').read() + with open(inp, 'r') as fd: + i = fd.read() corpus = PlaintextCorpusReader(os.path.dirname(inp), os.path.basename(inp)) sents = corpus.sents() print_out(outp, i, sents)