comparison g_read_sents.py @ 1:fb617586f4b2 draft

planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author stevecassidy
date Mon, 05 Dec 2016 05:22:05 -0500
parents e991d4e60c17
children a47980ef2b96
comparison
equal deleted inserted replaced
0:e991d4e60c17 1:fb617586f4b2
11 11
12 args = the_parser.parse_args() 12 args = the_parser.parse_args()
13 return args 13 return args
14 14
15 def print_out(outp, text, sentences): 15 def print_out(outp, text, sentences):
16 o = open(outp, 'w') 16 with open(outp, 'w') as output:
17 curr = 0 17 curr = 0
18 for sent in sentences: 18 for sent in sentences:
19 times = count_occurences(sent, sent[-1]) 19 times = count_occurences(sent, sent[-1])
20 curr = text.find(sent[0], curr) 20 curr = text.find(sent[0], curr)
21 end = find_nth(text, sent[-1], times, curr) + len(sent[-1]) 21 end = find_nth(text, sent[-1], times, curr) + len(sent[-1])
22 o.write(text[curr:end] + '\n') 22 output.write(text[curr:end] + '\n')
23 curr = end 23 curr = end
24 o.close()
25 24
26 def find_nth(string, sub, n, offset): 25 def find_nth(string, sub, n, offset):
27 start = string.find(sub, offset) 26 start = string.find(sub, offset)
28 while start >= 0 and n > 1: 27 while start >= 0 and n > 1:
29 start = string.find(sub, start + len(sub)) 28 start = string.find(sub, start + len(sub))
36 if string in item: 35 if string in item:
37 count += 1 36 count += 1
38 return count 37 return count
39 38
40 def read_sents(inp, outp): 39 def read_sents(inp, outp):
41 i = open(inp, 'r').read() 40 with open(inp, 'r') as fd:
41 i = fd.read()
42 corpus = PlaintextCorpusReader(os.path.dirname(inp), os.path.basename(inp)) 42 corpus = PlaintextCorpusReader(os.path.dirname(inp), os.path.basename(inp))
43 sents = corpus.sents() 43 sents = corpus.sents()
44 print_out(outp, i, sents) 44 print_out(outp, i, sents)
45 45
46 if __name__ == '__main__': 46 if __name__ == '__main__':