1. Alexei Gousev
  2. whoosh

Commits

Matt Chaput  committed 5834e01

WeightLengthScorer wasn't overriding supports_block_quality().
Fixed up matcher replacment code a bit.
Fixed string/bytes problem in filetables.

  • Participants
  • Parent commits ffa9271
  • Branches betterq

Comments (0)

Files changed (3)

File src/whoosh/filedb/filetables.py

View file
 
     @classmethod
     def from_string(cls, s):
-        hbyte = ord(s[0])
+        hbyte = ord(s[0:1])
         if hbyte < 2:
             # Freq, Doc freq, min length, max length, max weight, max WOL
             f, df, ml, xl, xw, xwol = cls.struct.unpack(s[1:cls.struct.size+1])

File src/whoosh/scoring.py

View file
                                      searcher.min_length(fieldname, text))
         return obj
     
+    def supports_block_quality(self):
+        return True
+    
     def score(self, matcher):
         return self._score(matcher.weight(), self.dfl(matcher.id()))
     

File src/whoosh/searching.py

View file
         
 
 class Collector(object):
-    def __init__(self, limit=10, usequality=True, replace=5, groupedby=None,
+    def __init__(self, limit=10, usequality=True, replace=10, groupedby=None,
                  timelimit=None, greedy=False, reverse=False):
         """A Collector finds the matching documents, scores them, collects them
         into a list, and produces a Results object from them.
         replacecounter = 0
         timelimited = bool(self.timelimit)
         
-        # If we're replacing, do one at the beginning to start with the most
-        # efficient matcher possible
-        if replace:
-            matcher = matcher.replace(minscore or 0)
-        
         # A flag to indicate whether we should check block quality at the start
         # of the next loop
         checkquality = True
         
         while matcher.is_active():
+            # If the replacement counter has reached 0, try replacing the
+            # matcher with a more efficient version
+            if replace:
+                if replacecounter == 0 or self.minscore != minscore:
+                    matcher = matcher.replace(minscore or 0)
+                    if not matcher.is_active():
+                        break
+                    replacecounter = replace
+                    minscore = self.minscore
+                replacecounter -= 1
+            
             # Check whether the time limit expired since the last match
             if timelimited and self.timedout and not self.greedy:
                 raise TimeLimit
             # flag is true, try to skip ahead to the next block with the
             # minimum required quality
             if usequality and checkquality and minscore is not None:
-                matcher.skip_to_quality(minscore)
+                skipped = matcher.skip_to_quality(minscore)
+                #print "skipped=", skipped
                 # Skipping ahead might have moved the matcher to the end of the
                 # posting list
                 if not matcher.is_active():
             if self.timedout:
                 raise TimeLimit
             
-            # The method that called us might have changed self.minscore, so
-            # read it again
-            minscore = self.minscore
-            
             # Move to the next document. This method returns True if the
             # matcher has entered a new block, so we should check block quality
             # again.
             checkquality = matcher.next()
             
-            # Ask the matcher to replace itself with a more efficient version
-            # if possible
-            if replace and matcher.is_active():
-                replacecounter += 1
-                if replacecounter >= replace:
-                    matcher = matcher.replace(minscore or 0)
-                    replacecounter = 0
                     
     def results(self):
         """Returns the current results from the collector. This is useful for