Commits

Matt Chaput committed 52f6c0b

Fixed bug in IntraWordFilter where a word is all delimiters (e.g. ":-(").
Fixes issue #166.

  • Participants
  • Parent commits acf42a8

Comments (0)

Files changed (2)

src/whoosh/analysis.py

         # For each run between 's
         for sc, ec in dispos:
             # Split on boundary characters
+            found = False
             for part_match in self.between.finditer(string, sc, ec):
+                found = True
                 part_start = part_match.start()
                 part_end = part_match.end()
                 
                 else:
                     # Not splitting on transitions, just yield the part
                     yield (part_start, part_end)
-    
+            
     def _merge(self, parts):
         mergewords = self.mergewords
         mergenums = self.mergenums
                         t.endchar = base + endchar
                     yield t
                 
-                # Set the new position counter based on the last part
-                newpos = parts[-1][1] + 1
+                if parts:
+                    # Set the new position counter based on the last part
+                    newpos = parts[-1][1] + 1
 
 
 class BiWordFilter(Filter):

tests/test_analysis.py

     assert_equal(" ".join([t.text for t in ts]), "A B C D")
     assert_equal([t.pos for t in ts], [3, 4, 5, 6])
 
+def test_frowny_face():
+    # See https://bitbucket.org/mchaput/whoosh/issue/166/
+    ana = analysis.RegexTokenizer(r"\S+") | analysis.IntraWordFilter()
+    # text is all delimiters
+    tokens = [t.text for t in ana(u(":-("))]
+    assert_equal(tokens, [])
+    
 
 
 
 
 
 
-