Commits

Doug Hellmann committed 975e8f4

add better contraction support

  • Participants
  • Parent commits 2c2be9a

Comments (0)

Files changed (1)

File spelling/sphinxcontrib/spelling.py

 from sphinx.util.nodes import inline_all_toctrees
 
 import enchant
-from enchant.tokenize import (get_tokenizer,
+from enchant.tokenize import (get_tokenizer, tokenize,
                               Filter, EmailFilter, WikiWordFilter,
                               unit_tokenize, wrap_tokenizer,
                               )
                  )
                 )
 
+class list_tokenize(tokenize):
+    def __init__(self, words):
+        tokenize.__init__(self, '')
+        self._words = words
+    def next(self):
+        if not self._words:
+            raise StopIteration()
+        word = self._words.pop(0)
+        return (word, 0)
+
 class ContractionFilter(Filter):
     """Strip common contractions from words.
     """
+    splits = {
+        "won't":['will', 'not'],
+        "isn't":['is', 'not'],
+        "can't":['can', 'not'],
+        "i'm":['I', 'am'],
+        }
     def _split(self, word):
+        # Fixed responses
+        if word.lower() in self.splits:
+            return list_tokenize(self.splits[word.lower()])
+
         # Possessive
         if word.lower().endswith("'s"):
             return unit_tokenize(word[:-2])
 
         # * not
-        if word.lower() == "won't":
-            return unit_tokenize(word[0])
         if word.lower().endswith("n't"):
             return unit_tokenize(word[:-3])
 
-        # I am
-        if word.lower() == "i'm":
-            return unit_tokenize(word[0])
         return unit_tokenize(word)
 
 class IgnoreWordsFilter(Filter):