Commits

Steven Myint committed d2bc0b2

Split on all non-words

Previously, there were some special cases (like "<"). This change takes
care of all non-words instead of just special cases. This resolves item
3 of issue #16 in an alternate way.

  • Participants
  • Parent commits 5d8f144

Comments (0)

Files changed (2)

File misspellings_lib.py

 import string
 
 _NORM_REGEX = re.compile('([a-z])([A-Z][a-z])')
-_WORD_REGEX = re.compile('[\s_0-9<>/,\.]+')
+_WORD_REGEX = re.compile('[\s_0-9\W]+', flags=re.UNICODE)
 
 
 def normalize(word):

File tests/test_class.py

 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # For Python 2.5
 from __future__ import with_statement
     self.assertEqual(['one', 'Two', 'Three', 'four', 'five'],
                      misspellings.split_words('oneTwoThree_four five'))
 
-    def testNormalize(self):
-      self.assertEqual('alpha', misspellings.normalize('"alpha".'))
+  def testSplitWordsWithOtherCharacters(self):
+    self.assertEqual(['the', 'big', 'cat'],
+                     misspellings.split_words('the%big$cat'))
+
+  def testNormalize(self):
+    self.assertEqual('alpha', misspellings.normalize('"alpha".'))
 
 
 if __name__ == '__main__':