Mercurial > repos > stevecassidy > nltktools
diff g_stemmer.py @ 2:a47980ef2b96 draft
planemo upload for repository https://github.com/Alveo/alveo-galaxy-tools commit b5b26e9118f2ad8af109d606746b39a5588f0511-dirty
author | stevecassidy |
---|---|
date | Wed, 01 Nov 2017 01:19:55 -0400 |
parents | fb617586f4b2 |
children |
line wrap: on
line diff
--- a/g_stemmer.py Mon Dec 05 05:22:05 2016 -0500 +++ b/g_stemmer.py Wed Nov 01 01:19:55 2017 -0400 @@ -1,7 +1,5 @@ -import sys -import os import nltk -from nltk.stem import * +from nltk.stem import PorterStemmer, LancasterStemmer, snowball import argparse @@ -13,6 +11,7 @@ args = parser.parse_args() return args + def stem_file(in_file, out_file, stemmer_type): with open(in_file, 'r') as fd: unsegmented = fd.read() @@ -27,6 +26,7 @@ output.write(stemmed_word) output.write('\n') + def get_stemmer(stemmer_type): if stemmer_type == 'lancaster': stemmer = LancasterStemmer() @@ -36,6 +36,7 @@ stemmer = snowball.EnglishStemmer() return stemmer + if __name__ == '__main__': args = arguments() stem_file(args.input, args.output, args.stemmer)