Matt Chaput avatar Matt Chaput committed dcae935

Made ArrayUnionMatcher more efficient when skipping.

Comments (0)

Files changed (2)

src/whoosh/matching/combo.py

         self._partsize = partsize
 
         self._a = array("f", (0 for _ in xrange(self._partsize)))
-        self._docnum = 0
-        self._offset = 0
-        self._limit = 0
+        self._docnum = self._min_id()
         self._read_part()
-        self._find_next()
 
     def __repr__(self):
         return ("%s(%r, boost=%f, scored=%r, partsize=%d)"
                 % (self.__class__.__name__, self._submatchers, self._boost,
                    self._scored, self._partsize))
 
+    def _min_id(self):
+        return min(subm.id() for subm in self._submatchers if subm.is_active())
+
     def _read_part(self):
         scored = self._scored
         boost = self._boost
-        limit = min(self._limit + self._partsize, self._doccount)
-        offset = self._limit
+        limit = min(self._docnum + self._partsize, self._doccount)
+        offset = self._docnum
         a = self._a
 
         # Clear the array
                 m.next()
 
         self._offset = offset
-        self._docnum = offset
         self._limit = limit
 
     def _find_next(self):
         a = self._a
-        doccount = self._doccount
+        docnum = self._docnum
         offset = self._offset
         limit = self._limit
 
-        while self._docnum < doccount:
-            dn = self._docnum
-            if dn == limit:
-                self._read_part()
-                limit = self._limit
-                offset = self._offset
-            elif a[dn - offset] <= 0.0:
-                self._docnum += 1
-            else:
+        while docnum < limit:
+            if a[docnum - offset] > 0:
                 break
+            docnum += 1
+
+        if docnum == limit:
+            self._docnum = self._min_id()
+            self._read_part()
+        else:
+            self._docnum = docnum
 
     def is_active(self):
         return self._docnum < self._doccount
 
     def skip_to(self, docnum):
         if docnum < self._offset:
+            # We've already passed it
+            return
+        elif docnum < self._limit:
+            # It's in the current part
+            self._docnum = docnum
+            self._find_next()
             return
 
-        while docnum >= self._limit:
+        # Advance all submatchers
+        submatchers = self._submatchers
+        active = False
+        for subm in submatchers:
+            subm.skip_to(docnum)
+            active = active or subm.is_active()
+
+        if active:
+            # Rebuffer
+            self._docnum = self._min_id()
             self._read_part()
-        self._docnum = docnum
-        self._find_next()
+        else:
+            self._docnum = self._doccount
 
     def skip_to_quality(self, minquality):
         skipped = 0
         while self.block_quality() <= minquality:
             skipped += 1
+            self._docnum = self._limit
             self._read_part()
         self._find_next()
         return skipped
 
             docnum += 1
             if docnum == limit:
+                self._docnum = docnum
                 self._read_part()
                 offset = self._offset
                 limit = self._limit

tests/test_matching.py

         assert_equal(r.scored_length(), 39)  # Number of docs in the results
 
 
+def test_arrayunion():
+    l1 = matching.ListMatcher([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
+    l2 = matching.ListMatcher([100, 200, 300, 400, 500, 600])
+    aum = matching.ArrayUnionMatcher([l1, l2], 600, partsize=5)
+    assert_equal(aum.id(), 10)
+    aum.skip_to(45)
+    assert_equal(aum.id(), 50)
+    aum.skip_to(550)
+    assert_equal(aum.id(), 600)
 
+
+
+
+
+
+
+
+
+
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.