Matt Chaput  committed 33015a2

Searcher.postings() on a non-atomic searcher now generates its own MultiMatcher with sub-matchers with correct scorers.

  • Participants
  • Parent commits c4ff3ea
  • Branches default

Comments (0)

Files changed (2)

File src/whoosh/

 from bisect import bisect_left, bisect_right
 from heapq import heapify, heapreplace, heappop, nlargest
-from whoosh import columns, fst
+from whoosh import columns, fst, scoring
 from whoosh.compat import abstractmethod
 from whoosh.compat import xrange, zip_, next, iteritems
 from whoosh.filedb.filestore import OverlayStorage
-    def postings(self, fieldname, text, scorer=None):
+    def postings(self, fieldname, text):
         """Returns a :class:`~whoosh.matching.Matcher` for the postings of the
         given term.
     def doc_frequency(self, fieldname, text):
         return sum(r.doc_frequency(fieldname, text) for r in self.readers)
-    def postings(self, fieldname, text, scorer=None):
+    def postings(self, fieldname, text):
+        # This method does not add a scorer; for that, use Searcher.postings()
         postreaders = []
         docoffsets = []
         term = (fieldname, text)
         for i, r in enumerate(self.readers):
             if term in r:
                 offset = self.doc_offsets[i]
-                # Get a posting reader for the term and add it to the list
-                pr = r.postings(fieldname, text, scorer=scorer)
+                pr = r.postings(fieldname, text)
         if not postreaders:
             raise TermNotFound(fieldname, text)
-        else:
-            return MultiMatcher(postreaders, docoffsets)
+        return MultiMatcher(postreaders, docoffsets)
     def first_id(self, fieldname, text):
         for i, r in enumerate(self.readers):

File src/whoosh/

     by the collector or the query objects to change how they operate.
-    def __init__(self, needs_current=False, weighting=None, top_query=None):
+    def __init__(self, needs_current=False, weighting=None, top_query=None,
+                 limit=0):
         :param needs_current: if True, the search requires that the matcher
             tree be "valid" and able to access information about the current
             means they should advanced the matcher doc-by-doc rather than using
             shortcut methods such as all_ids().
         :param weighting: the Weighting object to use for scoring documents.
+        :param top_query: a reference to the top-level query object.
+        :param limit: the number of results requested by the user.
         self.needs_current = needs_current
         self.weighting = weighting
         self.top_query = top_query
+        self.limit = limit
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self.__dict__)
         weighting = weighting or self.weighting
-        scorer = weighting.scorer(self, fieldname, text, qf=qf)
-        return self.ixreader.postings(fieldname, text, scorer=scorer)
+        globalscorer = weighting.scorer(self, fieldname, text, qf=qf)
+        if self.is_atomic():
+            return self.ixreader.postings(fieldname, text, scorer=globalscorer)
+        else:
+            from whoosh.matching import MultiMatcher
+            matchers = []
+            docoffsets = []
+            term = (fieldname, text)
+            for subsearcher, offset in self.subsearchers:
+                r = subsearcher.reader()
+                if term in r:
+                    # Make a segment-specific scorer; the scorer should call
+                    # searcher.parent() to get global stats
+                    scorer = weighting.scorer(subsearcher, fieldname, text, qf=qf)
+                    m = r.postings(fieldname, text, scorer=scorer)
+                    matchers.append(m)
+                    docoffsets.append(offset)
+            if not matchers:
+                raise TermNotFound(fieldname, text)
+            return MultiMatcher(matchers, docoffsets, globalscorer)
     def idf(self, fieldname, text):
         """Calculates the Inverse Document Frequency of the current term (calls
         # Return the results object from the collector
         return c.results()
-    def search_with_collector(self, q, collector):
+    def search_with_collector(self, q, collector, context=None):
         """Low-level method: runs a :class:`whoosh.query.Query` object on this
         searcher using the given :class:`whoosh.collectors.Collector` object
         to collect the results::
         # Get the search context object from the searcher
-        context = self.context()
+        context = context or self.context()
         # Allow collector to set up based on the top-level information
         collector.prepare(self, q, context)