Mercurial > repos > stevecassidy > nltktools

diff g_pos.py @ 1:fb617586f4b2 draft
planemo upload commit a81826fe44f09a3710a35c183aa88b745aeec064-dirty
author: stevecassidy
date: Mon, 05 Dec 2016 05:22:05 -0500
parents: e991d4e60c17
children: a47980ef2b96
--- a/g_pos.py	Wed Oct 12 22:17:53 2016 -0400
+++ b/g_pos.py	Mon Dec 05 05:22:05 2016 -0500
@@ -8,27 +8,27 @@
     parser.add_argument('--output', required=True,  action="store", type=str, help="output file path")
     args = parser.parse_args()
     return args
-  
+
 
 def postag(in_file, out_file):
     """Input: a text file with one token per line
     Output: a version of the text with Part of Speech tags written as word/TAG
     """
-    text = unicode(open(in_file, 'r').read(), errors='ignore')
+    with open(in_file, 'r') as fd:
+        text = fd.read()
+
     sentences = nltk.sent_tokenize(text)
-    output = open(out_file, 'w')
-    for sentence in sentences:
-        tokens = nltk.word_tokenize(sentence)
-        postags = nltk.pos_tag(tokens)
-        for postag in postags:
-            # print postag
-            output.write("%s/%s " % postag)
-    output.write('\n')
-    output.close()
+    
+    with open(out_file, 'w') as output:
+        for sentence in sentences:
+            tokens = nltk.word_tokenize(sentence)
+            postags = nltk.pos_tag(tokens)
+            for postag in postags:
+                # print postag
+                output.write("%s/%s " % postag)
+        output.write('\n')
 
 
 if __name__ == '__main__':
     args = arguments()
     postag(args.input, args.output)
-    
-    
\ No newline at end of file