Commits

Matt Chaput committed 2ece731

Added test and fix for skipping NestedChildMatcher.

Comments (0)

Files changed (2)

src/whoosh/query/nested.py

                                        boost=self.boost)
 
     class NestedChildMatcher(matching.WrappingMatcher):
-        def __init__(self, comb, m, limit, is_deleted, boost=1.0):
-            self.comb = comb
-            self.child = m
+        def __init__(self, parent_comb, wanted_parent_matcher, limit,
+                     is_deleted, boost=1.0):
+            self.parent_comb = parent_comb
+            self.wanted_parent_matcher = wanted_parent_matcher
             self.limit = limit
             self.is_deleted = is_deleted
             self.boost = boost
-            self._reset()
+            self._nextchild = -1
+            self._nextparent = -1
+            self._find_next_children()
 
         def __repr__(self):
-            return "%s(%r, %r)" % (self.__class__.__name__, self.comb,
-                                   self.child)
+            return "%s(%r, %r)" % (self.__class__.__name__,
+                                   self.parent_comb,
+                                   self.wanted_parent_matcher)
 
         def reset(self):
             self.child.reset()
             return self
 
         def _find_next_children(self):
-            comb = self.comb
-            m = self.child
+            # "comb" contains the doc IDs of all parent documents
+            comb = self.parent_comb
+            # "m" is the matcher for "wanted" parents
+            m = self.wanted_parent_matcher
+            # Last doc ID + 1
             limit = self.limit
+            # A function that returns True if a doc ID is deleted
             is_deleted = self.is_deleted
             nextchild = self._nextchild
             nextparent = self._nextparent
                 self._find_next_children()
 
         def skip_to(self, docid):
+            comb = self.parent_comb
+            wpm = self.wanted_parent_matcher
+
+            # self._nextchild is the "current" matching child ID
             if docid <= self._nextchild:
                 return
 
-            m = self.child
-            if not m.is_active() or docid < m.id():
-                # We've already read-ahead past the desired doc, so iterate
-                while self.is_active() and self._nextchild < docid:
+            # self._nextparent is the next parent ID (matching or not)
+            if docid < self._nextparent:
+                # Just iterate
+                while self.is_active() and self.id() < docid:
                     self.next()
-            elif m.is_active():
-                # The child is active and hasn't read-ahead to the desired doc
-                # yet, so skip to it and re-find
-                m.skip_to(docid)
-                self._find_next_children()
             else:
-                # Go inactive
-                self._nextchild = self.limit
+                # Find the parent before the target ID
+                pid = comb.before(docid)
+                # Skip the parent matcher to that ID
+                wpm.skip_to(pid)
+                # If that made the matcher inactive, then we're done
+                if not wpm.is_active():
+                    self._nextchild = self._nextparent = self.limit
+                else:
+                    # Reestablish for the next child after the next matching
+                    # parent
+                    self._find_next_children()
 
         def value(self):
             raise NotImplementedError(self.__class__)

tests/test_nested.py

 from __future__ import with_statement
 
-from whoosh import fields, query, sorting
+from whoosh import fields, qparser, query, sorting
 from whoosh.compat import u
 from whoosh.filedb.filestore import RamStorage
+from whoosh.util.testing import TempIndex
 
 
 def test_nested_parent():
         f = sorting.StoredFieldFacet("song_name")
         r = s.search(zq, sortedby=f)
         assert [hit["track"] for hit in r] == [3, 2, 1]
+
+
+def test_nested_skip():
+    schema = fields.Schema(
+        id=fields.ID(unique=True, stored=True),
+        name=fields.TEXT(stored=True),
+        name_ngrams=fields.NGRAMWORDS(minsize=4, field_boost=1.2),
+        type=fields.TEXT,
+    )
+
+    domain = [
+        ("book_1", "The Dark Knight Returns", "book"),
+        ("chapter_1", "The Dark Knight Returns", "chapter"),
+        ("chapter_2", "The Dark Knight Triumphant", "chapter"),
+        ("chapter_3", "Hunt the Dark Knight", "chapter"),
+        ("chapter_4", "The Dark Knight Falls", "chapter")
+    ]
+
+    with TempIndex(schema) as ix:
+        with ix.writer() as w:
+            for id, name, typ in domain:
+                w.add_document(id=id, name=name, name_ngrams=name, type=typ)
+
+        with ix.searcher() as s:
+            all_parents = query.Term("type", "book")
+            wanted_parents = query.Term("name", "dark")
+            children_of_wanted_parents = query.NestedChildren(all_parents,
+                                                              wanted_parents)
+
+            r1 = s.search(children_of_wanted_parents)
+            assert r1.scored_length() == 4
+            assert [hit["id"] for hit in r1] == ["chapter_1", "chapter_2",
+                                                 "chapter_3", "chapter_4"]
+
+            wanted_children = query.And([query.Term("type", "chapter"),
+                                         query.Term("name", "hunt")])
+
+            r2 = s.search(wanted_children)
+            assert r2.scored_length() == 1
+            assert [hit["id"] for hit in r2] == ["chapter_3"]
+
+            complex_query = query.And([children_of_wanted_parents,
+                                       wanted_children])
+
+            r3 = s.search(complex_query)
+            assert r3.scored_length() == 1
+            assert [hit["id"] for hit in r3] == ["chapter_3"]