Source

Kiva Editor's Assistant / kea.py

Diff from to
 import codecs
 import logging
 import sys
+import re
 
 from clipboard import get_clipboard_text, set_clipboard_text
-from mytoken import Token
+from keatoken import Token
 import rules
 import tagger
-import myparser
+import keaparser
 
 
 class Sentence(object):
         sentence_idx = self.start
         match_index = 0
         while sentence_idx < self.end:
-            if self.ea.tokens[sentence_idx].str != match_list[match_index]:
+            if not re.match(match_list[match_index],
+                            self.ea.tokens[sentence_idx].str):
                 match_index = 0
             else:
                 match_index += 1
         self.tokens = [Token(self._original_text, 0,
                               len(self._original_text)), eof_token]
         self.sentences = []
-        self._parser = myparser.Parser()
+        self._parser = keaparser.Parser()
         # apply first phase rules to replace the original Token object
         # with multiple Token objects, one for each bit of the input
         # text that qualifies as a single input token.
 
             if append_space:
                 self.edited_text += u' '
+        self.edited_text = self.edited_text.strip()
 
     def _parse_sentences(self):
         # for each range of tokens representing a sentence, generate a
     def dump_pos_tags(self):
         """Write every token with a Part-Of-Speech tag to stdout."""
         for token in self.tokens:
-            if hasattr(token, 'pos') and token.pos:
+            if token.pos:
                 if len(token.pos) > 1:
                     sys.stdout.write(token.str + u'/[')
                     first_element = True
                         if not first_element:
                             sys.stdout.write(', ')
                         sys.stdout.write(postag.pos)
-                    sys.stdout.write(']\n')
+                        first_element = False
+                    sys.stdout.write('] ')
                 else:
-                    print u'{}/{}'.format(token.str, token.pos[0]),
+                    sys.stdout.write(u'{}/{} '.format(token.str, token.pos[0].pos))
             if token.str == '\n':
-                print
+                sys.stdout.write('\n')
+        if self.tokens:
+            sys.stdout.write('\n')
 
 
 def parse_commandline():