Commits

Matt Chaput committed 31a721c

Changed matchers to store scores as doubles, so stored scores match "live" ones.
Changed Preload matcher to not store 0s for documents outside the matching range.

Comments (0)

Files changed (2)

src/whoosh/matching/combo.py

     """Instead of Instead of marching the sub-matchers along in parallel, this
     matcher pre-reads the scores for EVERY MATCHING DOCUMENT, trading memory
     for speed.
+    
+    This is faster than the implementation using a binary tree of
+    :class:`~whoosh.matching.binary.UnionMatcher` objects (possibly just
+    because of less overhead), but it doesn't allow getting information about
+    the "current" document other than the score, because there isn't really a
+    current document, just an array of scores.
     """
 
     def __init__(self, submatchers, doccount, boost=1.0, scored=True):
         CombinationMatcher.__init__(self, submatchers, boost=boost)
 
         self._doccount = doccount
+
+        a = array("d")
         active = [subm for subm in self._submatchers if subm.is_active()]
-        if not active:
-            self._docnum = doccount
+        if active:
+            offset = self._docnum = min(m.id() for m in active)
+            for m in active:
+                while m.is_active():
+                    if scored:
+                        score = m.score() * boost
+                    else:
+                        score = boost
+
+                    docnum = m.id()
+                    place = docnum - offset
+                    if len(a) <= place:
+                        a.extend(0 for _ in xrange(place - len(a) + 1))
+                    a[place] += score
+                    m.next()
+            self._a = a
+            self._offset = offset
         else:
-            self._docnum = min(m.id() for m in active)
-
-        a = array("f", (0 for _ in xrange(doccount)))
-        for m in submatchers:
-            while m.is_active():
-                if scored:
-                    score = m.score() * boost
-                else:
-                    score = boost
-                a[m.id()] = score
-                m.next()
+            self._docnum = 0
+            self._offset = 0
         self._a = a
 
     def is_active(self):
-        return self._docnum < self._doccount
+        return self._docnum - self._offset < len(self._a)
 
     def id(self):
         return self._docnum
 
     def score(self):
-        return self._a[self._docnum]
+        return self._a[self._docnum - self._offset]
 
     def next(self):
         a = self._a
-        doccount = self._doccount
-        docnum = self._docnum
+        offset = self._offset
+        place = self._docnum - offset
 
-        docnum += 1
-        while docnum < doccount and a[docnum] == 0:
-            docnum += 1
-        self._docnum = docnum
+        place += 1
+        while place < len(a) and a[place] == 0:
+            place += 1
+        self._docnum = place + offset
 
     def max_quality(self):
-        return max(self._a)
+        return max(self._a[self._docnum - self._offset:])
 
     def block_quality(self):
-        return max(self._a)
+        return self.max_quality()
 
     def skip_to(self, docnum):
-        a = self._a
-        doccount = self._doccount
-        while docnum < doccount and a[docnum] == 0:
-            docnum += 1
+        if docnum < self._docnum:
+            return
+
         self._docnum = docnum
+        if self._a[docnum - self._offset] == 0:
+            self.next()
 
     def skip_to_quality(self, minquality):
         a = self._a
-        docnum = self._docnum
-        doccount = self._doccount
+        offset = self._offset
+        place = self._docnum - offset
 
         skipped = 0
-        while docnum < doccount and a[docnum] <= minquality:
-            docnum += 1
+        while place < len(a) and a[place] <= minquality:
+            place += 1
             skipped = 1
 
+        self._docnum = place + offset
         return skipped
 
     def all_ids(self):
         a = self._a
-        docnum = self._docnum
-        doccount = self._doccount
-        while docnum < doccount:
-            if a[docnum] > 0:
-                yield docnum
-            docnum += 1
+        offset = self._offset
+        place = self._docnum - offset
+
+        while place < len(a):
+            if a[place] > 0:
+                yield place + offset
+            place += 1
 
 
 class ArrayUnionMatcher(CombinationMatcher):
             partsize = doccount
         self._partsize = partsize
 
-        self._a = array("f", (0 for _ in xrange(self._partsize)))
+        self._a = array("d", (0 for _ in xrange(self._partsize)))
         self._docnum = self._min_id()
         self._read_part()
 

tests/test_searching.py

 from datetime import datetime, timedelta
 
 from nose.tools import assert_equal, assert_raises  # @UnresolvedImport
+from nose.tools import assert_almost_equal  # @UnresolvedImport
 
 from whoosh import analysis, fields, index, qparser, query, searching, scoring
-from whoosh.compat import u, xrange, text_type, permutations
+from whoosh.compat import u, text_type
+from whoosh.compat import xrange, permutations, izip_longest
 from whoosh.filedb.filestore import RamStorage
 
 
             q.binary_matcher = True
             r2 = [(hit.docnum, hit.score) for hit in s.search(q, limit=None)]
 
-            assert_equal(r1, r2)
+            for item1, item2 in izip_longest(r1, r2):
+                assert_equal(item1[0], item2[0])
+                assert_equal(item1[1], item2[1])
 
 
 def test_not():
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.