nltktools: g_stemmer.py comparison

planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty

comparison

equal deleted inserted replaced

-:e991d4e60c17
+:fb617586f4b2
 parser.add_argument('--stemmer', required=False, action="store", type=str, help="output file path")
 args = parser.parse_args()
 return args
 def stem_file(in_file, out_file, stemmer_type):
-unsegmented = unicode(open(in_file, 'r').read(), errors='ignore')
+with open(in_file, 'r') as fd:
-output = open(out_file, 'w')
+unsegmented = fd.read()
-sentences = nltk.sent_tokenize(unsegmented)
-stemmer = get_stemmer(stemmer_type)
+with open(out_file, 'w') as output:
-for sentence in sentences:
+sentences = nltk.sent_tokenize(unsegmented)
-words = nltk.word_tokenize(sentence)
+stemmer = get_stemmer(stemmer_type)
-for word in words:
+for sentence in sentences:
-stemmed_word = stemmer.stem(word)
+words = nltk.word_tokenize(sentence)
-output.write(stemmed_word)
+for word in words:
-output.write('\n')
+stemmed_word = stemmer.stem(word)
-output.close()
+output.write(stemmed_word)
+output.write('\n')
 def get_stemmer(stemmer_type):
 if stemmer_type == 'lancaster':
 stemmer = LancasterStemmer()
 elif stemmer_type == 'porter':

Mercurial > repos > stevecassidy > nltktools