Commits

Matt Chaput committed 1989a7e

Fixed scorer handling of sub-searchers. Fixes issue #183.

Comments (0)

Files changed (5)

src/whoosh/scoring.py

 This module contains classes for scoring (and sorting) search results.
 """
 
-from __future__ import division
+from __future__ import division, print_function
 from math import log, pi
 
 from whoosh.compat import iteritems
         raise NotImplementedError(self.__class__.__name__)
 
 
-# WeightScorer
+# Scorer that just returns term weight
 
 class WeightScorer(BaseScorer):
     """A scorer that simply returns the weight as the score. This is useful
     def block_quality(self, matcher):
         return matcher.block_max_weight()
 
+    @classmethod
+    def for_(cls, searcher, fieldname, text):
+        ti = searcher.term_info(fieldname, text)
+        return cls(ti.max_weight())
+
+
+# Base scorer for models that only use weight and field length
 
 class WeightLengthScorer(BaseScorer):
     """Base class for scorers where the only per-document variables are term
     weight and field length.
     
-    Subclasses should follow this pattern:
-    
-    * Initializer should take ``searcher, fieldname, text`` as the first three
-      arguments. Additional arguments (such as tuning parameters) can be passed
-      after these.
-      
-    * Override the ``_score(weight, length)`` method to return the score for a
-      document with the given weight and length.
-      
-    * Create scorers by calling the ``using`` class method. This method creates
-      the scorer object and does routine setup of attributes before returning
-      it.
-    
-    >>> scorer = BM25FScorer.using(searcher, "text", "render", B=0.5)
-    >>> scorer.max_weight
-    1.25840242
+    Subclasses should override the ``_score(weight, length)`` method to return
+    the score for a document with the given weight and length, and call the
+    ``setup()`` method at the end of the initializer to set up common
+    attributes.
     """
 
-    @classmethod
-    def using(cls, searcher, fieldname, text, *args, **kwargs):
-        """This class method is meant to be called to create and configure a
-        new scorer. It initializes the scorer and then does the busy work of
+    def setup(self, searcher, fieldname, text):
+        """Initializes the scorer and then does the busy work of
         adding the ``dfl()`` function and ``max_quality`` attributes.
         
         This method assumes the initializers of WeightLengthScorer subclasses
-        always take ``searcher, fieldname, text`` as the first three arguments.
-        Any additional arguments given to this method are passed through to the
-        initializer.
+        always take ``searcher, offset, fieldname, text`` as the first three
+        arguments. Any additional arguments given to this method are passed
+        through to the initializer.
         
-        >>> w = BM25
+        Note: this method calls ``self._score()``, so you should only call it
+        in the initializer after setting up whatever attributes ``_score()``
+        depends on::
+        
+            class MyScorer(WeightLengthScorer):
+                def __init__(self, searcher, fieldname, text, parm=1.0):
+                    self.parm = parm
+                    self.setup(searcher, fieldname, text)
+                
+                def _score(self, weight, length):
+                    return (weight / (length + 1)) * self.parm
         """
 
         ti = searcher.term_info(fieldname, text)
-
         if not searcher.schema[fieldname].scorable:
             return WeightScorer(ti.max_weight())
 
-        obj = cls(searcher, fieldname, text, *args, **kwargs)
-        obj.dfl = lambda docid: searcher.doc_field_length(docid, fieldname, 1)
-        obj.max_quality = obj._score(ti.max_weight(), ti.min_length())
-        return obj
+        self.dfl = lambda docid: searcher.doc_field_length(docid, fieldname, 1)
+        self.max_quality = self._score(ti.max_weight(), ti.min_length())
 
     def supports_block_quality(self):
         return True
 
     def score(self, matcher):
+        print("matcher=", matcher, "id=", matcher.id(), "weight=", matcher.weight(), "len=", self.dfl(matcher.id()))
         return self._score(matcher.weight(), self.dfl(matcher.id()))
 
     def block_quality(self, matcher):
 
 # WeightingModel implementations
 
+# Debugging model
+
+class DebugModel(WeightingModel):
+    def __init__(self):
+        self.log = []
+
+    def scorer(self, searcher, fieldname, text, qf=1):
+        return DebugScorer(searcher, fieldname, text, self.log)
+
+
+class DebugScorer(BaseScorer):
+    def __init__(self, searcher, fieldname, text, log):
+        ti = searcher.term_info(fieldname, text)
+        self.max_quality = ti.max_weight()
+
+        self.searcher = searcher
+        self.fieldname = fieldname
+        self.text = text
+        self.log = log
+
+    def supports_block_quality(self):
+        return True
+
+    def score(self, matcher):
+        fieldname, text = self.fieldname, self.text
+        docid = matcher.id()
+        w = matcher.weight()
+        length = self.searcher.doc_field_length(docid, fieldname)
+        self.log.append((fieldname, text, docid, w, length))
+        return w
+
+    def block_quality(self, matcher):
+        return matcher.block_max_weight()
+
 
 # BM25F Model
 
     # fl - field length in the current document
     # avgfl - average field length across documents in collection
     # B, K1 - free paramters
-    score = idf * ((tf * K1 + 1) / (tf + K1 * (1 - B + B * fl / avgfl)))
-    return score
+
+    return idf * ((tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (fl / avgfl))))
 
 
 class BM25F(WeightingModel):
         return True
 
     def scorer(self, searcher, fieldname, text, qf=1):
+        if not searcher.schema[fieldname].scorable:
+            return WeightScorer.for_(searcher, fieldname, text)
+
         if fieldname in self._field_B:
             B = self._field_B[fieldname]
         else:
             B = self.B
 
-        return BM25FScorer.using(searcher, fieldname, text, B, self.K1, qf=qf)
+        return BM25FScorer(searcher, fieldname, text, B, self.K1, qf=qf)
 
 
 class BM25FScorer(WeightLengthScorer):
     def __init__(self, searcher, fieldname, text, B, K1, qf=1):
-        self.idf = searcher.idf(fieldname, text)
-        self.avgfl = searcher.avg_field_length(fieldname) or 1
+        parent = searcher.get_parent()
+        self.idf = parent.idf(fieldname, text)
+        self.avgfl = parent.avg_field_length(fieldname) or 1
         self.B = B
         self.K1 = K1
         self.qf = qf
+        self.setup(searcher, fieldname, text)
 
     def _score(self, weight, length):
-        return bm25(self.idf, weight, length, self.avgfl, self.B, self.K1)
+        s = bm25(self.idf, weight, length, self.avgfl, self.B, self.K1)
+        return s
 
 
 # DFree model
         return True
 
     def scorer(self, searcher, fieldname, text, qf=1):
-        return DFreeScorer.using(searcher, fieldname, text, qf=qf)
+        if not searcher.schema[fieldname].scorable:
+            return WeightScorer.for_(searcher, fieldname, text)
+
+        return DFreeScorer(searcher, fieldname, text, qf=qf)
 
 
 class DFreeScorer(WeightLengthScorer):
     def __init__(self, searcher, fieldname, text, qf=1):
-        self.cf = searcher.weight(fieldname, text)
-        self.fl = searcher.field_length(fieldname)
+        parent = searcher.get_parent()
+        self.cf = parent.weight(fieldname, text)
+        self.fl = parent.field_length(fieldname)
         self.qf = qf
+        self.setup(searcher, fieldname, text)
 
     def _score(self, weight, length):
         return dfree(weight, self.cf, self.qf, length, self.fl)
 
     def scorer(self, searcher, fieldname, text, qf=1):
         if not searcher.schema[fieldname].scorable:
-            return WeightScorer(searcher.max_weight(fieldname, text))
+            return WeightScorer.for_(searcher, fieldname, text)
 
-        return PL2Scorer.using(searcher, fieldname, text, self.c, qf=qf)
+        return PL2Scorer(searcher, fieldname, text, self.c, qf=qf)
 
 
 class PL2Scorer(WeightLengthScorer):
     def __init__(self, searcher, fieldname, text, c, qf=1):
-        self.cf = searcher.frequency(fieldname, text)
-        self.dc = searcher.doc_count_all()
-        self.avgfl = searcher.avg_field_length(fieldname) or 1
+        parent = searcher.get_parent()
+        self.cf = parent.frequency(fieldname, text)
+        self.dc = parent.doc_count_all()
+        self.avgfl = parent.avg_field_length(fieldname) or 1
         self.c = c
         self.qf = qf
+        self.setup(searcher, fieldname, text)
 
     def _score(self, weight, length):
         return pl2(weight, self.cf, self.qf, self.dc, length, self.avgfl,
 
 class TF_IDF(WeightingModel):
     def scorer(self, searcher, fieldname, text, qf=1):
-        idf = searcher.idf(fieldname, text)
+        parent = searcher.get_parent()
+        idf = parent.idf(fieldname, text)
         maxweight = searcher.term_info(fieldname, text).max_weight()
-        return TF_IDF.TF_IDFScorer(maxweight, idf)
+        return TF_IDFScorer(maxweight, idf)
 
-    class TF_IDFScorer(BaseScorer):
-        def __init__(self, maxweight, idf):
-            self.max_quality = maxweight * idf
-            self.idf = idf
 
-        def supports_block_quality(self):
-            return True
+class TF_IDFScorer(BaseScorer):
+    def __init__(self, maxweight, idf):
+        self.max_quality = maxweight * idf
+        self.idf = idf
 
-        def score(self, matcher):
-            return matcher.weight() * self.idf
+    def supports_block_quality(self):
+        return True
 
-        def block_quality(self, matcher):
-            return matcher.block_maxweight() * self.idf
+    def score(self, matcher):
+        return matcher.weight() * self.idf
+
+    def block_quality(self, matcher):
+        return matcher.block_maxweight() * self.idf
 
 
 # Utility models

src/whoosh/searching.py

         return self.__class__(reader, fromindex=self._ix,
                               weighting=self.weighting, parent=self)
 
+    def _offset_for_subsearcher(self, subsearcher):
+        for ss, offset in self.subsearchers:
+            if ss is subsearcher:
+                return offset
+
     def is_atomic(self):
         return self.reader().is_atomic()
 
+    def has_parent(self):
+        return self.parent is not None
+
+    def get_parent(self):
+        """Returns the parent of this searcher (if has_parent() is True), or
+        else self.
+        """
+
+        if self.has_parent():
+            return self.parent()
+        else:
+            return self
+
     def doc_count(self):
         """Returns the number of UNDELETED documents in the index.
         """
             # on an empty index.
             return None
 
-        s = self.parent() if self.parent else self
-        return self.weighting.scorer(s, fieldname, text, qf=qf)
+        return self.weighting.scorer(self, fieldname, text, qf=qf)
 
     def postings(self, fieldname, text, qf=1):
         """Returns a :class:`whoosh.matching.Matcher` for the postings of the

src/whoosh/writing.py

         
             writer.add_document(a="alfa", b="bravo", c="charlie",
                                 _boost=2.0, _c_boost=3.0)
-            
+        
+        Note that some scoring algroithms, including Whoosh's default BM25F,
+        do not work with term weights less than 1, so you should generally not
+        use a boost factor less than 1.
+        
         See also :meth:`Writer.update_document`.
         """
 

tests/test_indexing.py

                       quick=fields.NGRAM,
                       note=fields.STORED)
     st = RamStorage()
-    
+
     ix = st.create_index(s)
     w = ix.writer()
     w.add_document(title=u("First"), content=u("This is the first document"), path=u("/a"),
         w.add_document(id=u("2"))
         w.add_document(id=u("3"))
         w.commit()
-        
+
         w = ix.writer()
         w.commit()
 
                 docs[word].append(i)
             w.add_document(text=u(" ").join(smp), id=i)
         w.commit()
-        
+
         with ix.searcher() as s:
             for word in domain:
                 print(word)
     s = fields.Schema(name=fields.TEXT, value=fields.TEXT)
     st = RamStorage()
     ix = st.create_index(s)
-    
+
     w = ix.writer()
     w.add_document(name=u("Yellow brown"), value=u("Blue red green purple?"))
     w.add_document(name=u("Alpha beta"), value=u("Gamma delta epsilon omega."))
     w.commit()
-    
+
     w = ix.writer()
     w.add_document(name=u("One two"), value=u("Three four five."))
     w.commit()
-    
+
     tr = ix.reader()
     assert_equal(ix.doc_count_all(), 3)
     assert_equal(list(tr.lexicon("name")), ["alpha", "beta", "brown", "one", "two", "yellow"])
         for length in lengths:
             w.add_document(f2=u(" ").join(islice(cycle(tokens), length)))
         w.commit()
-        
+
         with ix.reader() as dr:
             ls1 = [dr.doc_field_length(i, "f1") for i in xrange(0, len(lengths))]
             assert_equal(ls1, [0] * len(lengths))
         length = (i + 1) ** 6
         w.add_document(text=" ".join(word for _ in xrange(length)))
     w.commit()
-    
+
     s = ix.searcher()
     for i, word in enumerate(domain):
         target = byte_to_length(length_to_byte((i + 1) ** 6))
     w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T"))
     w.add_document(f1=u("D E F"), f2=u("U V A B C D E"))
     w.commit()
-    
+
     dr = ix.reader()
     assert_equal(dr.stored_fields(0)["f1"], "A B C D E")
     assert_equal(dr.doc_field_length(0, "f1"), 5)
     assert_equal(dr.doc_field_length(0, "f2"), 3)
     assert_equal(dr.doc_field_length(1, "f2"), 4)
     assert_equal(dr.doc_field_length(2, "f2"), 7)
-    
+
     assert_equal(dr.field_length("f1"), 16)
     assert_equal(dr.field_length("f2"), 14)
     assert_equal(dr.max_field_length("f1"), 8)
     assert_equal(dr.max_field_length("f2"), 7)
-    
+
 def test_merged_lengths():
     s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                       f2=fields.KEYWORD(stored=True, scorable=True))
         w.add_document(f1=u("A B C"), f2=u("X"))
         w.add_document(f1=u("B C D E"), f2=u("Y Z"))
         w.commit()
-        
+
         w = ix.writer()
         w.add_document(f1=u("A"), f2=u("B C D E X Y"))
         w.add_document(f1=u("B C"), f2=u("X"))
         w.commit(NO_MERGE)
-        
+
         w = ix.writer()
         w.add_document(f1=u("A B X Y Z"), f2=u("B C"))
         w.add_document(f1=u("Y X"), f2=u("A B"))
         w.commit(NO_MERGE)
-        
+
         with ix.reader() as dr:
             assert_equal(dr.stored_fields(0)["f1"], u("A B C"))
             assert_equal(dr.doc_field_length(0, "f1"), 3)
             assert_equal(dr.doc_field_length(2, "f2"), 6)
             assert_equal(dr.doc_field_length(4, "f1"), 5)
-    
+
 def test_frequency_keyword():
     s = fields.Schema(content=fields.KEYWORD)
     st = RamStorage()
     ix = st.create_index(s)
-    
+
     w = ix.writer()
     w.add_document(content=u("A B C D E"))
     w.add_document(content=u("B B B B C D D"))
     w.add_document(content=u("D E F"))
     w.commit()
-    
+
     with ix.reader() as tr:
         assert_equal(tr.doc_frequency("content", u("B")), 2)
         assert_equal(tr.frequency("content", u("B")), 5)
         assert_equal(tr.frequency("content", u("F")), 1)
         assert_equal(tr.doc_frequency("content", u("Z")), 0)
         assert_equal(tr.frequency("content", u("Z")), 0)
-        
+
         stats = [(fname, text, ti.doc_frequency(), ti.weight())
                  for (fname, text), ti in tr]
-        
+
         assert_equal(stats, [("content", u("A"), 1, 1), ("content", u("B"), 2, 5),
                              ("content", u("C"), 2, 2), ("content", u("D"), 3, 4),
                              ("content", u("E"), 2, 2), ("content", u("F"), 1, 1)])
-    
+
 def test_frequency_text():
     s = fields.Schema(content=fields.KEYWORD)
     st = RamStorage()
     ix = st.create_index(s)
-    
+
     w = ix.writer()
     w.add_document(content=u("alfa bravo charlie delta echo"))
     w.add_document(content=u("bravo bravo bravo bravo charlie delta delta"))
     w.add_document(content=u("delta echo foxtrot"))
     w.commit()
-    
+
     with ix.reader() as tr:
         assert_equal(tr.doc_frequency("content", u("bravo")), 2)
         assert_equal(tr.frequency("content", u("bravo")), 5)
         assert_equal(tr.frequency("content", u("foxtrot")), 1)
         assert_equal(tr.doc_frequency("content", u("zulu")), 0)
         assert_equal(tr.frequency("content", u("zulu")), 0)
-        
+
         stats = [(fname, text, ti.doc_frequency(), ti.weight())
              for (fname, text), ti in tr]
-        
+
         assert_equal(stats, [("content", u("alfa"), 1, 1), ("content", u("bravo"), 2, 5),
                              ("content", u("charlie"), 2, 2), ("content", u("delta"), 3, 4),
                              ("content", u("echo"), 2, 2), ("content", u("foxtrot"), 1, 1)])
         w.add_document(key=u("B"), name=u("Alpha beta"), value=u("Gamma delta epsilon omega."))
         w.add_document(key=u("C"), name=u("One two"), value=u("Three four five."))
         w.commit()
-        
+
         w = ix.writer()
         count = w.delete_by_term("key", u("B"))
         assert_equal(count, 1)
         w.commit(merge=False)
-        
+
         assert_equal(ix.doc_count_all(), 3)
         assert_equal(ix.doc_count(), 2)
-        
+
         w = ix.writer()
         w.add_document(key=u("A"), name=u("Yellow brown"), value=u("Blue red green purple?"))
         w.add_document(key=u("B"), name=u("Alpha beta"), value=u("Gamma delta epsilon omega."))
         w.add_document(key=u("C"), name=u("One two"), value=u("Three four five."))
         w.commit()
-        
+
         # This will match both documents with key == B, one of which is already
         # deleted. This should not raise an error.
         w = ix.writer()
         count = w.delete_by_term("key", u("B"))
         assert_equal(count, 1)
         w.commit()
-        
+
         ix.optimize()
         assert_equal(ix.doc_count_all(), 4)
         assert_equal(ix.doc_count(), 4)
-        
+
         with ix.reader() as tr:
             assert_equal(list(tr.lexicon("name")), ["brown", "one", "two", "yellow"])
 
 def test_writer_reuse():
     s = fields.Schema(key=fields.ID)
     ix = RamStorage().create_index(s)
-    
+
     w = ix.writer()
     w.add_document(key=u("A"))
     w.add_document(key=u("B"))
     w.add_document(key=u("C"))
     w.commit()
-    
+
     # You can't re-use a commited/canceled writer
     assert_raises(IndexingError, w.add_document, key=u("D"))
     assert_raises(IndexingError, w.update_document, key=u("B"))
                    {"id": u("test2"), "path": u("/test/2"), "text": u("There")},
                    {"id": u("test3"), "path": u("/test/3"), "text": u("Reader")},
                    ]
-    
+
     schema = fields.Schema(id=fields.ID(unique=True, stored=True),
                            path=fields.ID(unique=True, stored=True),
                            text=fields.TEXT)
-    
+
     with TempIndex(schema, "update") as ix:
         writer = ix.writer()
         for doc in SAMPLE_DOCS:
             writer.add_document(**doc)
         writer.commit()
-        
+
         writer = ix.writer()
         writer.update_document(id=u("test2"), path=u("test/1"), text=u("Replacement"))
         writer.commit()
-    
+
 def test_update2():
     schema = fields.Schema(key=fields.ID(unique=True, stored=True),
                            p=fields.ID(stored=True))
             w = ix.writer()
             w.update_document(key=text_type(n % 10), p=text_type(i))
             w.commit()
-            
+
         with ix.searcher() as s:
             results = [d["key"] for d in s.all_stored_fields()]
             results.sort()
         assert_equal(ix.doc_count_all(), 3)
         reindex()
         assert_equal(ix.doc_count_all(), 3)
-    
+
 def test_noscorables1():
     values = [u("alfa"), u("bravo"), u("charlie"), u("delta"), u("echo"), u("foxtrot"),
               u("golf"), u("hotel"), u("india"), u("juliet"), u("kilo"), u("lima")]
     from random import choice, sample, randint
-    
+
     times = 1000
-    
+
     schema = fields.Schema(id=fields.ID, tags=fields.KEYWORD)
     with TempIndex(schema, "noscorables1") as ix:
         w = ix.writer()
         for _ in xrange(times):
             w.add_document(id=choice(values), tags=u(" ").join(sample(values, randint(2, 7))))
         w.commit()
-        
+
         with ix.searcher() as s:
             s.search(query.Term("id", "bravo"))
-        
+
 def test_noscorables2():
     schema = fields.Schema(field=fields.ID)
     with TempIndex(schema, "noscorables2") as ix:
         writer.add_document(id=u("2"), content=u("bravo charlie delta echo")) #deleted 1
         writer.add_document(id=u("3"), content=u("charlie delta echo foxtrot")) #deleted 2
         writer.commit()
-        
+
         writer = ix.writer()
         writer.delete_by_term("id", "1")
         writer.delete_by_term("id", "2")
         writer.add_document(id=u("6"), content=u("delta echo foxtrot golf")) #deleted 2
         writer.add_document(id=u("7"), content=u("echo foxtrot golf hotel")) # no d
         writer.commit(merge=False)
-        
+
         writer = ix.writer()
         writer.delete_by_term("id", "3")
         writer.delete_by_term("id", "6")
         writer.commit(merge=False)
 
         assert_equal(ix.doc_count(), 6)
-    
+
         with ix.searcher() as s:
             r = s.search(query.Prefix("content", u("d")), optimize=False)
             assert_equal(sorted([d["id"] for d in r]), ["4", "5", "8", "9"])
-            
+
             r = s.search(query.Prefix("content", u("d")))
             assert_equal(sorted([d["id"] for d in r]), ["4", "5", "8", "9"])
-            
+
             r = s.search(query.Prefix("content", u("d")), limit=None)
             assert_equal(sorted([d["id"] for d in r]), ["4", "5", "8", "9"])
-    
+
 def test_deleteall():
     schema = fields.Schema(text=fields.TEXT)
     with TempIndex(schema, "deleteall") as ix:
                 w.commit()
                 w = ix.writer()
         w.commit()
-        
+
         # This is just a test, don't use this method to delete all docs IRL!
         doccount = ix.doc_count_all()
         w = ix.writer()
         for docnum in xrange(doccount):
             w.delete_document(docnum)
         w.commit()
-        
+
         with ix.searcher() as s:
             r = s.search(query.Or([query.Term("text", u("alfa")), query.Term("text", u("bravo"))]))
             assert_equal(len(r), 0)
-        
+
         ix.optimize()
         assert_equal(ix.doc_count_all(), 0)
-        
+
         with ix.reader() as r:
             assert_equal(list(r), [])
-            
+
 def test_single():
     schema = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
     with TempIndex(schema, "single") as ix:
         w = ix.writer()
         w.add_document(id=u("1"), text=u("alfa"))
         w.commit()
-        
+
         with ix.searcher() as s:
             assert ("text", u("alfa")) in s.reader()
             assert_equal(list(s.documents(id="1")), [{"id": "1"}])
             assert_equal(list(s.documents(text="alfa")), [{"id": "1"}])
             assert_equal(list(s.all_stored_fields()), [{"id": "1"}])
-    
+
 def test_indentical_fields():
     schema = fields.Schema(id=fields.STORED,
                            f1=fields.TEXT, f2=fields.TEXT, f3=fields.TEXT)
     with TempIndex(schema, "identifields") as ix:
         w = ix.writer()
-        w.add_document(id=1, f1=u("alfa"), f2=u("alfa"), f3 = u("alfa"))
+        w.add_document(id=1, f1=u("alfa"), f2=u("alfa"), f3=u("alfa"))
         w.commit()
-    
+
         with ix.searcher() as s:
             assert_equal(list(s.lexicon("f1")), ["alfa"])
             assert_equal(list(s.lexicon("f2")), ["alfa"])
             assert_equal(list(s.documents(f1="alfa")), [{"id": 1}])
             assert_equal(list(s.documents(f2="alfa")), [{"id": 1}])
             assert_equal(list(s.documents(f3="alfa")), [{"id": 1}])
-        
+
 def test_multivalue():
     schema = fields.Schema(id=fields.STORED, date=fields.DATETIME, num=fields.NUMERIC)
     ix = RamStorage().create_index(schema)
     w.add_document(id=2, date=[datetime(2002, 2, 2), datetime(2003, 3, 3)],
                    num=[1, 2, 3, 12])
     w.commit()
-    
+
     nfield = schema["num"]
     dfield = schema["date"]
     with ix.reader() as r:
     w.add_document(id=1, a=u("alfa"), b=u("bear"), _a_boost=5.0)
     w.add_document(id=2, a=u("alfa alfa alfa alfa"), _boost=0.5)
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(query.Term("a", "alfa"))
         assert_equal([hit["id"] for hit in r], [1, 0, 2])
-    
+
     w = ix.writer()
-    w.add_document(id=3, a=u("alfa"), b=u("bottle"), _a_boost=0.5)
+    w.add_document(id=3, a=u("alfa"), b=u("bottle"))
     w.add_document(id=4, b=u("bravo"), _b_boost=2.0)
     w.commit(merge=False)
 
     with ix.searcher() as s:
         r = s.search(query.Term("a", "alfa"))
-        assert_equal([hit["id"] for hit in r], [1, 0, 2, 3])
+        assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])

tests/test_searching.py

 
 
 def make_index():
-    s = fields.Schema(key = fields.ID(stored = True),
-                      name = fields.TEXT,
-                      value = fields.TEXT)
+    s = fields.Schema(key=fields.ID(stored=True),
+                      name=fields.TEXT,
+                      value=fields.TEXT)
     st = RamStorage()
     ix = st.create_index(s)
-    
+
     w = ix.writer()
-    w.add_document(key = u("A"), name = u("Yellow brown"), value = u("Blue red green render purple?"))
-    w.add_document(key = u("B"), name = u("Alpha beta"), value = u("Gamma delta epsilon omega."))
-    w.add_document(key = u("C"), name = u("One two"), value = u("Three rendered four five."))
-    w.add_document(key = u("D"), name = u("Quick went"), value = u("Every red town."))
-    w.add_document(key = u("E"), name = u("Yellow uptown"), value = u("Interest rendering outer photo!"))
+    w.add_document(key=u("A"), name=u("Yellow brown"), value=u("Blue red green render purple?"))
+    w.add_document(key=u("B"), name=u("Alpha beta"), value=u("Gamma delta epsilon omega."))
+    w.add_document(key=u("C"), name=u("One two"), value=u("Three rendered four five."))
+    w.add_document(key=u("D"), name=u("Quick went"), value=u("Every red town."))
+    w.add_document(key=u("E"), name=u("Yellow uptown"), value=u("Interest rendering outer photo!"))
     w.commit()
-    
+
     return ix
 
 def _get_keys(stored_fields):
         assert_equal(target, _docs(q, s))
 
 def test_empty_index():
-    schema = fields.Schema(key = fields.ID(stored=True), value = fields.TEXT)
+    schema = fields.Schema(key=fields.ID(stored=True), value=fields.TEXT)
     st = RamStorage()
     assert_raises(index.EmptyIndexError, st.open_index, schema=schema)
 
     _run_query(Term("name", u("yellow")), [u("A"), u("E")])
     _run_query(Term("value", u("zeta")), [])
     _run_query(Term("value", u("red")), [u("A"), u("D")])
-    
+
 def test_require():
     _run_query(Require(Term("value", u("red")), Term("name", u("yellow"))),
                     [u("A")])
-    
+
 def test_and():
     _run_query(And([Term("value", u("red")), Term("name", u("yellow"))]),
                     [u("A")])
     # Missing
     _run_query(And([Term("value", u("ochre")), Term("name", u("glonk"))]),
                     [])
-    
+
 def test_or():
     _run_query(Or([Term("value", u("red")), Term("name", u("yellow"))]),
                     [u("A"), u("D"), u("E")])
     writer.add_document(name=u("d"), value=u("delta echo golf hotel india"))
     writer.add_document(name=u("e"), value=u("echo golf hotel india juliet"))
     writer.commit()
-    
+
     with ix.searcher() as s:
         p = qparser.QueryParser("value", None)
         results = s.search(p.parse("echo NOT golf"))
         assert_equal(sorted([d["name"] for d in results]), ["a", "b"])
-        
+
         results = s.search(p.parse("echo NOT bravo"))
         assert_equal(sorted([d["name"] for d in results]), ["c", "d", "e"])
-    
+
     ix.delete_by_term("value", u("bravo"))
-    
+
     with ix.searcher() as s:
         results = s.search(p.parse("echo NOT charlie"))
         assert_equal(sorted([d["name"] for d in results]), ["d", "e"])
     schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("A"), content=u("alfa bravo charlie delta echo"))
     w.add_document(id=u("B"), content=u("bravo charlie delta echo foxtrot"))
     w.add_document(id=u("D"), content=u("delta echo foxtrot golf hotel"))
     w.add_document(id=u("E"), content=u("echo foxtrot golf hotel india"))
     w.commit()
-    
+
     with ix.searcher() as s:
         qp = qparser.QueryParser("content", schema)
-        
+
         q = qp.parse(u("charlie [delta TO foxtrot]"))
         assert_equal(q.__class__, And)
         assert_equal(q[0].__class__, Term)
         assert_equal(q[1].endexcl, False)
         ids = sorted([d['id'] for d in s.search(q)])
         assert_equal(ids, [u('A'), u('B'), u('C')])
-        
+
         q = qp.parse(u("foxtrot {echo TO hotel]"))
         assert_equal(q.__class__, And)
         assert_equal(q[0].__class__, Term)
         assert_equal(q[1].endexcl, False)
         ids = sorted([d['id'] for d in s.search(q)])
         assert_equal(ids, [u('B'), u('C'), u('D'), u('E')])
-        
+
         q = qp.parse(u("{bravo TO delta}"))
         assert_equal(q.__class__, TermRange)
         assert_equal(q.start, "bravo")
         assert_equal(q.endexcl, True)
         ids = sorted([d['id'] for d in s.search(q)])
         assert_equal(ids, [u('A'), u('B'), u('C')])
-        
+
         # Shouldn't match anything
         q = qp.parse(u("[1 to 10]"))
         assert_equal(q.__class__, TermRange)
     for letter in u("abcdefg"):
         w.add_document(id=letter)
     w.commit()
-    
+
     with ix.searcher() as s:
         def check(startexcl, endexcl, string):
             q = TermRange("id", "b", "f", startexcl, endexcl)
             r = "".join(sorted(d['id'] for d in s.search(q)))
             assert_equal(r, string)
-            
+
         check(False, False, "bcdef")
         check(True, False, "cdef")
         check(True, True, "cde")
         check(False, True, "bcde")
-    
+
 def test_open_ranges():
     schema = fields.Schema(id=fields.ID(stored=True))
     st = RamStorage()
     for letter in u("abcdefg"):
         w.add_document(id=letter)
     w.commit()
-    
+
     with ix.searcher() as s:
         qp = qparser.QueryParser("id", schema)
         def check(qstring, result):
             q = qp.parse(qstring)
             r = "".join(sorted([d['id'] for d in s.search(q)]))
             assert_equal(r, result)
-            
+
         check(u("[b TO]"), "bcdefg")
         check(u("[TO e]"), "abcde")
         check(u("[b TO d]"), "bcd")
 
 def test_open_numeric_ranges():
     domain = range(0, 10000, 7)
-    
+
     schema = fields.Schema(num=fields.NUMERIC(stored=True))
     ix = RamStorage().create_index(schema)
     w = ix.writer()
     for i in domain:
         w.add_document(num=i)
     w.commit()
-    
+
     qp = qparser.QueryParser("num", schema)
     with ix.searcher() as s:
         q = qp.parse("[100 to]")
         r = [hit["num"] for hit in s.search(q, limit=None)]
         assert_equal(r, [n for n in domain if n >= 100])
-        
+
         q = qp.parse("[to 5000]")
         r = [hit["num"] for hit in s.search(q, limit=None)]
         assert_equal(r, [n for n in domain if n <= 5000])
 def test_open_date_ranges():
     basedate = datetime(2011, 1, 24, 6, 25, 0, 0)
     domain = [basedate + timedelta(days=n) for n in xrange(-20, 20)]
-    
+
     schema = fields.Schema(date=fields.DATETIME(stored=True))
     ix = RamStorage().create_index(schema)
     w = ix.writer()
     for d in domain:
         w.add_document(date=d)
     w.commit()
-    
+
     with ix.searcher() as s:
         # Without date parser
         qp = qparser.QueryParser("date", schema)
         assert len(r) > 0
         target = [d for d in domain if d >= datetime(2011, 1, 10, 6, 25)]
         assert_equal(r, target)
-        
+
         q = qp.parse("[to 2011-01-30]")
         r = [hit["date"] for hit in s.search(q, limit=None)]
         assert len(r) > 0
         target = [d for d in domain if d <= datetime(2011, 1, 30, 6, 25)]
         assert_equal(r, target)
-    
+
         # With date parser
         from whoosh.qparser.dateparse import DateParserPlugin
         qp.add_plugin(DateParserPlugin(basedate))
-        
+
         q = qp.parse("[10 jan 2011 to]")
         r = [hit["date"] for hit in s.search(q, limit=None)]
         assert len(r) > 0
         target = [d for d in domain if d >= datetime(2011, 1, 10, 6, 25)]
         assert_equal(r, target)
-        
+
         q = qp.parse("[to 30 jan 2011]")
         r = [hit["date"] for hit in s.search(q, limit=None)]
         assert len(r) > 0
         target = [d for d in domain if d <= datetime(2011, 1, 30, 6, 25)]
         assert_equal(r, target)
-        
+
 def test_negated_unlimited_ranges():
     # Whoosh should treat u("[to]") as if it was "*"
     schema = fields.Schema(id=fields.ID(stored=True), num=fields.NUMERIC,
     w = ix.writer()
     from string import ascii_letters
     domain = text_type(ascii_letters)
-    
+
     dt = datetime.now()
     for i, letter in enumerate(domain):
         w.add_document(id=letter, num=i, date=dt + timedelta(days=i))
     w.commit()
-    
+
     with ix.searcher() as s:
         qp = qparser.QueryParser("id", schema)
-        
+
         nq = qp.parse(u("NOT [to]"))
         assert_equal(nq.__class__, Not)
         q = nq.query
         assert_equal(q.__class__, Every)
         assert_equal("".join(h["id"] for h in s.search(q, limit=None)), domain)
         assert_equal(list(nq.docs(s)), [])
-        
+
         nq = qp.parse(u("NOT num:[to]"))
         assert_equal(nq.__class__, Not)
         q = nq.query
         assert_equal(q.end, None)
         assert_equal("".join(h["id"] for h in s.search(q, limit=None)), domain)
         assert_equal(list(nq.docs(s)), [])
-        
+
         nq = qp.parse(u("NOT date:[to]"))
         assert_equal(nq.__class__, Not)
         q = nq.query
     schema = fields.Schema(a=fields.ID(stored=True), b=fields.KEYWORD)
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(a=u("First"), b=u("ccc ddd"))
     w.add_document(a=u("Second"), b=u("aaa ddd"))
     w.add_document(a=u("Third"), b=u("ccc eee"))
     w.commit()
-    
+
     qp = qparser.QueryParser("b", schema)
     with ix.searcher() as s:
         qr = qp.parse(u("b:ccc OR b:eee"))
     w.add_document(id=u("charlie"), content=u("charlie"))
     w.add_document(id=u("delta"), content=u("delta"))
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(Term("content", u("bravo")))
         assert_equal(len(r), 1)
         assert_equal(r[0]["id"], "bravo")
-    
+
     w = ix.writer()
     w.add_document(id=u("echo"), content=u("echo"))
     w.commit()
     assert_equal(len(ix._segments()), 1)
-    
+
     with ix.searcher() as s:
         r = s.search(Term("content", u("bravo")))
         assert_equal(len(r), 1)
         assert_equal(r[0]["id"], "bravo")
-    
+
 def test_multireader():
     sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
     st = RamStorage()
     w.add_document(id=u("hotel"), content=u("hotel"))
     w.add_document(id=u("india"), content=u("india"))
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(Term("content", u("bravo")))
         assert_equal(len(r), 1)
         assert_equal(r[0]["id"], "bravo")
-    
+
     w = ix.writer()
     w.add_document(id=u("juliet"), content=u("juliet"))
     w.add_document(id=u("kilo"), content=u("kilo"))
     w.add_document(id=u("romeo"), content=u("romeo"))
     w.commit()
     assert_equal(len(ix._segments()), 2)
-    
+
     #r = ix.reader()
     #assert r.__class__.__name__, "MultiReader")
     #pr = r.postings("content", u("bravo"))
-    
+
     with ix.searcher() as s:
         r = s.search(Term("content", u("bravo")))
         assert_equal(len(r), 1)
     writer.add_document(name=u("D"), value=u("Gibberish blonk falunk miss muffet sat tuffet garbonzo"))
     writer.add_document(name=u("E"), value=u("Blah blah blah pancakes"))
     writer.commit()
-    
+
     with ix.searcher() as s:
         def names(results):
             return sorted([fields['name'] for fields in results])
-        
+
         q = Phrase("value", [u("little"), u("miss"), u("muffet"), u("sat"), u("tuffet")])
         m = q.matcher(s)
         assert_equal(m.__class__.__name__, "SpanNearMatcher")
-        
+
         r = s.search(q)
         assert_equal(names(r), ["A"])
         assert_equal(len(r), 1)
-        
+
         q = Phrase("value", [u("miss"), u("muffet"), u("sat"), u("tuffet")])
         assert_equal(names(s.search(q)), ["A", "D"])
-        
+
         q = Phrase("value", [u("falunk"), u("gibberish")])
         r = s.search(q)
         assert_equal(names(r), [])
         assert_equal(len(r), 0)
-        
+
         q = Phrase("value", [u("gibberish"), u("falunk")], slop=2)
         assert_equal(names(s.search(q)), ["D"])
-        
+
         q = Phrase("value", [u("blah")] * 4)
         assert_equal(names(s.search(q)), [])  # blah blah blah blah
-        
+
         q = Phrase("value", [u("blah")] * 3)
         m = q.matcher(s)
         assert_equal(names(s.search(q)), ["E"])
 #        
 #        q = Phrase("value", [u("blah")] * 3)
 #        assert names(searcher.search(q)), ["E"])
-    
+
 def test_phrase_score():
     schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
     storage = RamStorage()
     writer.add_document(name=u("E"), value=u("Blah blah blah pancakes"))
     writer.add_document(name=u("F"), value=u("Little miss muffet little miss muffet"))
     writer.commit()
-    
+
     with ix.searcher() as s:
         q = Phrase("value", [u("little"), u("miss"), u("muffet")])
         m = q.matcher(s)
     writer.add_document(title=u("Richard of York"))
     writer.add_document(title=u("Lily the Pink"))
     writer.commit()
-    
+
     with ix.searcher() as s:
         qp = qparser.QueryParser("title", schema)
         q = qp.parse(u("richard of york"))
     schema = fields.Schema(text=tfield)
     storage = RamStorage()
     ix = storage.create_index(schema)
-    
+
     writer = ix.writer()
     for ls in permutations(["ape", "bay", "can", "day"], 4):
         writer.add_document(text=u(" ").join(ls))
     writer.commit()
-    
+
     with ix.searcher() as s:
         def result(q):
             r = s.search(q, limit=None, sortedby=None)
             return sorted([d['text'] for d in r])
-        
+
         q = Phrase("text", ["bay", "can", "day"])
         assert_equal(result(q), [u('ape bay can day'), u('bay can day ape')])
-    
+
 def test_phrase_sameword():
     schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
     storage = RamStorage()
     ix = storage.create_index(schema)
-    
+
     writer = ix.writer()
     writer.add_document(id=1, text=u("The film Linda Linda Linda is good"))
     writer.add_document(id=2, text=u("The model Linda Evangelista is pretty"))
     writer.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(Phrase("text", ["linda", "linda", "linda"]), limit=None)
         assert_equal(len(r), 1)
 def test_phrase_multi():
     schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
     ix = RamStorage().create_index(schema)
-    
+
     domain = u("alfa bravo charlie delta echo").split()
     w = None
     for i, ls in enumerate(permutations(domain)):
             w = None
     if w is not None:
         w.commit()
-    
+
     with ix.searcher() as s:
         q = Phrase("text", ["alfa", "bravo"])
         _ = s.search(q)
                            hobbies=fields.TEXT(stored=True))
     storage = RamStorage()
     ix = storage.create_index(schema)
-    writer = ix.writer() 
+    writer = ix.writer()
     writer.add_document(name=u('Frank'), hobbies=u('baseball, basketball'))
     writer.commit()
     r = ix.reader()
     assert_equal(r.field_length("hobbies"), 2)
     assert_equal(r.field_length("name"), 1)
     r.close()
-    
+
     writer = ix.writer()
-    writer.add_document(name=u('Jonny')) 
+    writer.add_document(name=u('Jonny'))
     writer.commit()
-    
+
     with ix.searcher() as s:
         r = s.reader()
         assert_equal(len(ix._segments()), 1)
         assert_equal(r.field_length("hobbies"), 2)
         assert_equal(r.field_length("name"), 2)
-        
+
         parser = qparser.MultifieldParser(['name', 'hobbies'], schema)
         q = parser.parse(u("baseball"))
         result = s.search(q)
         assert_equal(len(result), 1)
-    
+
 def test_search_fieldname_underscores():
     s = fields.Schema(my_name=fields.ID(stored=True), my_value=fields.TEXT)
     st = RamStorage()
     ix = st.create_index(s)
-    
+
     w = ix.writer()
     w.add_document(my_name=u("Green"), my_value=u("It's not easy being green"))
     w.add_document(my_name=u("Red"), my_value=u("Hopping mad like a playground ball"))
     w.commit()
-    
+
     qp = qparser.QueryParser("my_value", schema=s)
     with ix.searcher() as s:
         r = s.search(qp.parse(u("my_name:Green")))
         assert_equal(r[0]['my_name'], "Green")
-    
+
 def test_short_prefix():
     s = fields.Schema(name=fields.ID, value=fields.TEXT)
     qp = qparser.QueryParser("value", schema=s)
     q = qp.parse(u("s*"))
     assert_equal(q.__class__.__name__, "Prefix")
     assert_equal(q.text, "s")
-    
+
 def test_weighting():
     from whoosh.scoring import Weighting, BaseScorer
-    
+
     schema = fields.Schema(id=fields.ID(stored=True),
                            n_comments=fields.STORED)
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("1"), n_comments=5)
     w.add_document(id=u("2"), n_comments=12)
     w.add_document(id=u("3"), n_comments=2)
     w.add_document(id=u("4"), n_comments=7)
     w.commit()
-    
+
     # Fake Weighting implementation
     class CommentWeighting(Weighting):
         def scorer(self, searcher, fieldname, text, qf=1):
             return self.CommentScorer(searcher.stored_fields)
-        
+
         class CommentScorer(BaseScorer):
             def __init__(self, stored_fields):
                 self.stored_fields = stored_fields
-        
+
             def score(self, matcher):
                 ncomments = self.stored_fields(matcher.id()).get("n_comments", 0)
                 return ncomments
-    
+
     with ix.searcher(weighting=CommentWeighting()) as s:
         q = TermRange("id", u("1"), u("4"), constantscore=False)
-        
+
         r = s.search(q)
         ids = [fs["id"] for fs in r]
         assert_equal(ids, ["2", "4", "1", "3"])
     w = ix.writer()
     w.add_document(id=1, f1=u("alfa bravo charlie delta"),
                    f2=u("alfa alfa alfa"),
-                   f3 = u("alfa echo foxtrot hotel india"))
+                   f3=u("alfa echo foxtrot hotel india"))
     w.commit()
-    
+
     with ix.searcher(weighting=scoring.Frequency()) as s:
         assert_equal(list(s.documents(f1="alfa")), [{"id": 1}])
         assert_equal(list(s.documents(f2="alfa")), [{"id": 1}])
         assert_equal(list(s.documents(f3="alfa")), [{"id": 1}])
-        
+
         qs = [Term("f1", "alfa"), Term("f2", "alfa"), Term("f3", "alfa")]
         dm = DisjunctionMax(qs)
         r = s.search(dm)
     schema = fields.Schema(id=fields.ID(stored=True))
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("alfa"))
     w.add_document(id=u("bravo"))
     w.add_document(id=u("echo"))
     w.add_document(id=u("foxtrot"))
     w.commit()
-    
+
     w = ix.writer()
     w.delete_by_term("id", "bravo")
     w.delete_by_term("id", "delta")
     w.delete_by_term("id", "echo")
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(Every("id"))
         assert_equal(sorted([d['id'] for d in r]), ["alfa", "charlie", "foxtrot"])
-    
+
 def test_missing_wildcard():
     schema = fields.Schema(id=fields.ID(stored=True), f1=fields.TEXT, f2=fields.TEXT)
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("1"), f1=u("alfa"), f2=u("apple"))
     w.add_document(id=u("2"), f1=u("bravo"))
     w.add_document(id=u("4"), f2=u("donut"))
     w.add_document(id=u("5"))
     w.commit()
-    
+
     with ix.searcher() as s:
         r = s.search(Every("id"))
         assert_equal(sorted([d['id'] for d in r]), ["1", "2", "3", "4", "5"])
-        
+
         r = s.search(Every("f1"))
         assert_equal(sorted([d['id'] for d in r]), ["1", "2", "3"])
-        
+
         r = s.search(Every("f2"))
         assert_equal(sorted([d['id'] for d in r]), ["1", "3", "4"])
 
 def test_finalweighting():
     from whoosh.scoring import Frequency
-    
+
     schema = fields.Schema(id=fields.ID(stored=True),
                            summary=fields.TEXT,
                            n_comments=fields.STORED)
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("1"), summary=u("alfa bravo"), n_comments=5)
     w.add_document(id=u("2"), summary=u("alfa"), n_comments=12)
     w.add_document(id=u("3"), summary=u("bravo"), n_comments=2)
     w.add_document(id=u("4"), summary=u("bravo bravo"), n_comments=7)
     w.commit()
-    
+
     class CommentWeighting(Frequency):
         use_final = True
-        
+
         def final(self, searcher, docnum, score):
             ncomments = searcher.stored_fields(docnum).get("n_comments", 0)
             return ncomments
-    
+
     with ix.searcher(weighting=CommentWeighting()) as s:
         r = s.search(qparser.QueryParser("summary", None).parse("alfa OR bravo"))
         ids = [fs["id"] for fs in r]
         assert_equal(["2", "4", "1", "3"], ids)
-    
+
 def test_outofdate():
     schema = fields.Schema(id=fields.ID(stored=True))
     st = RamStorage()
     ix = st.create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("1"))
     w.add_document(id=u("2"))
     w.commit()
-    
+
     s = ix.searcher()
     assert s.up_to_date()
-    
+
     w = ix.writer()
     w.add_document(id=u("3"))
     w.add_document(id=u("4"))
-    
+
     assert s.up_to_date()
     w.commit()
     assert not s.up_to_date()
 def test_find_missing():
     schema = fields.Schema(id=fields.ID, text=fields.KEYWORD(stored=True))
     ix = RamStorage().create_index(schema)
-    
+
     w = ix.writer()
     w.add_document(id=u("1"), text=u("alfa"))
     w.add_document(id=u("2"), text=u("bravo"))
     w.add_document(id=u("6"), text=u("foxtrot"))
     w.add_document(text=u("golf"))
     w.commit()
-    
+
     with ix.searcher() as s:
         qp = qparser.QueryParser("text", schema)
         q = qp.parse(u("NOT id:*"))
         assert_equal(list(h["text"] for h in r), ["charlie", "echo", "golf"])
 
 def test_ngram_phrase():
-    schema = fields.Schema(text=fields.NGRAM(minsize=2, maxsize=2, phrase=True), path=fields.ID(stored=True)) 
+    schema = fields.Schema(text=fields.NGRAM(minsize=2, maxsize=2, phrase=True), path=fields.ID(stored=True))
     ix = RamStorage().create_index(schema)
     writer = ix.writer()
-    writer.add_document(text=u('\u9AD8\u6821\u307E\u3067\u306F\u6771\u4EAC\u3067\u3001\u5927\u5B66\u304B\u3089\u306F\u4EAC\u5927\u3067\u3059\u3002'), path=u('sample')) 
+    writer.add_document(text=u('\u9AD8\u6821\u307E\u3067\u306F\u6771\u4EAC\u3067\u3001\u5927\u5B66\u304B\u3089\u306F\u4EAC\u5927\u3067\u3059\u3002'), path=u('sample'))
     writer.commit()
-    
+
     with ix.searcher() as s:
         p = qparser.QueryParser("text", schema)
-        
+
         q = p.parse(u('\u6771\u4EAC\u5927\u5B66'))
         assert_equal(len(s.search(q)), 1)
-        
+
         q = p.parse(u('"\u6771\u4EAC\u5927\u5B66"'))
         assert_equal(len(s.search(q)), 0)
-        
+
         q = p.parse(u('"\u306F\u6771\u4EAC\u3067"'))
         assert_equal(len(s.search(q)), 1)
-    
+
 def test_ordered():
     domain = u("alfa bravo charlie delta echo foxtrot").split(" ")
-    
+
     schema = fields.Schema(f=fields.TEXT(stored=True))
     ix = RamStorage().create_index(schema)
     writer = ix.writer()
     for ls in permutations(domain):
         writer.add_document(f=u(" ").join(ls))
     writer.commit()
-    
+
     with ix.searcher() as s:
         q = Ordered([Term("f", u("alfa")), Term("f", u("charlie")), Term("f", "echo")])
         r = s.search(q)
     w.add_document(id=3, f=u("bravo four five"))
     w.add_document(id=4, f=u("bravo six seven"))
     w.commit()
-    
+
     with ix.searcher() as s:
         q = Otherwise(Term("f", u("alfa")), Term("f", u("six")))
         assert_equal([d["id"] for d in s.search(q)], [1, 2])
-        
+
         q = Otherwise(Term("f", u("tango")), Term("f", u("four")))
         assert_equal([d["id"] for d in s.search(q)], [2, 3])
-        
+
         q = Otherwise(Term("f", u("tango")), Term("f", u("nine")))
         assert_equal([d["id"] for d in s.search(q)], [])
 
     w.add_document(id=3, f=u("charlie delta echo foxtrot"))
     w.add_document(id=4, f=u("delta echo foxtrot golf"))
     w.commit()
-    
+
     with ix.searcher() as s:
         q = FuzzyTerm("f", "brave")
         assert_equal([d["id"] for d in s.search(q)], [1, 2])
-        
+
 def test_fuzzyterm2():
     schema = fields.Schema(id=fields.STORED, f=fields.TEXT(spelling=True))
     ix = RamStorage().create_index(schema)
     w.add_document(id=3, f=u("charlie delta echo foxtrot"))
     w.add_document(id=4, f=u("delta echo foxtrot golf"))
     w.commit()
-    
+
     with ix.searcher() as s:
         assert_equal(list(s.reader().terms_within("f", u("brave"), 1)), ["bravo"])
         q = FuzzyTerm("f", "brave")
         assert_equal([d["id"] for d in s.search(q)], [1, 2])
-    
+
 def test_multireader_not():
     schema = fields.Schema(id=fields.STORED, f=fields.TEXT)
-    
+
     ix = RamStorage().create_index(schema)
     w = ix.writer()
     w.add_document(id=0, f=u("alfa bravo chralie"))
     w.add_document(id=3, f=u("delta echo foxtrot"))
     w.add_document(id=4, f=u("echo foxtrot golf"))
     w.commit()
-    
+
     with ix.searcher() as s:
         q = And([Term("f", "delta"), Not(Term("f", "delta"))])
         r = s.search(q)
         assert_equal(len(r), 0)
-    
+
     ix = RamStorage().create_index(schema)
     w = ix.writer()
     w.add_document(id=5, f=u("alfa bravo chralie"))
     w.add_document(id=10, f=u("foxtrot golf delta"))
     w.commit(merge=False)
     assert len(ix._segments()) > 1
-    
+
     with ix.searcher() as s:
         q = And([Term("f", "delta"), Not(Term("f", "delta"))])
         r = s.search(q)
         t = u(" ").join(ls)
         w.add_document(title=t, text=t)
     w.commit()
-    
+
     q = Or([Term("title", u("alfa")), Term("title", u("bravo")), Phrase("text", [u("bravo"), u("charlie"), u("delta")])])
-    
+
     def boost_phrases(q):
         if isinstance(q, Phrase):
             q.boost *= 1000.0
         else:
             return q.apply(boost_phrases)
     q = boost_phrases(q)
-    
+
     with ix.searcher() as s:
         r = s.search(q, limit=None)
         for hit in r:
     w.add_document(id=8, path=u("/b/3"), text=u("charlie delta echo"))
     w.add_document(id=9, path=u("/c/3"), text=u("delta echo alfa"))
     w.commit(merge=False)
-    
+
     with ix.searcher() as s:
         fq = Or([Prefix("path", "/a"), Prefix("path", "/b")])
         r = s.search(Term("text", "alfa"), filter=fq)
         assert_equal([d["id"] for d in r], [1, 4, 5])
-        
+
         r = s.search(Term("text", "bravo"), filter=fq)
-        assert_equal([d["id"] for d in r], [1, 2, 5, 7,])
-    
+        assert_equal([d["id"] for d in r], [1, 2, 5, 7, ])
+
 def test_timelimit():
     schema = fields.Schema(text=fields.TEXT)
     ix = RamStorage().create_index(schema)
     for _ in xrange(50):
         w.add_document(text=u("alfa"))
     w.commit()
-    
+
     import time
     from whoosh import matching
-    
+
     class SlowMatcher(matching.WrappingMatcher):
         def next(self):
             time.sleep(0.02)
             self.child.next()
-    
+
     class SlowQuery(WrappingQuery):
         def matcher(self, searcher):
             return SlowMatcher(self.child.matcher(searcher))
-    
+
     with ix.searcher() as s:
         oq = Term("text", u("alfa"))
         sq = SlowQuery(oq)
-        
+
         col = searching.Collector(timelimit=0.1, limit=None)
         assert_raises(searching.TimeLimit, col.search, s, sq)
-        
+
         col = searching.Collector(timelimit=0.1, limit=40)
         assert_raises(searching.TimeLimit, col.search, s, sq)
-        
+
         col = searching.Collector(timelimit=0.25, limit=None)
         try:
             col.search(s, sq)
         except searching.TimeLimit:
             r = col.results()
             assert r.scored_length() > 0
-        
+
         col = searching.Collector(timelimit=0.5, limit=None)
         r = col.search(s, oq)
         assert r.runtime < 0.5
-            
+
 def test_fieldboost():
     schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT)
     ix = RamStorage().create_index(schema)
     w.add_document(id=5, a=u("alfa alfa echo"), b=u("tango tango tango"))
     w.add_document(id=6, a=u("alfa bravo echo"), b=u("alfa alfa tango"))
     w.commit()
-    
+
     def field_booster(fieldname, factor=2.0):
         "Returns a function which will boost the given field in a query tree"
         def booster_fn(obj):
             else:
                 return obj
         return booster_fn
-    
+
     with ix.searcher() as s:
         q = Or([Term("a", u("alfa")), Term("b", u("alfa"))])
         q = q.accept(field_booster("a", 100.0))
         assert_equal(text_type(q), text_type("(a:alfa^100.0 OR b:alfa)"))
         r = s.search(q)
         assert_equal([hit["id"] for hit in r], [2, 5, 6, 3, 0, 1, 4])
-    
+
 def test_andmaybe_quality():
     schema = fields.Schema(id=fields.STORED, title=fields.TEXT(stored=True),
                            year=fields.NUMERIC)
     ix = RamStorage().create_index(schema)
-    
+
     domain = [(u('Alpha Bravo Charlie Delta'), 2000),
               (u('Echo Bravo Foxtrot'), 2000), (u('Bravo Golf Hotel'), 2002),
               (u('Bravo India'), 2002), (u('Juliet Kilo Bravo'), 2004),
     for title, year in domain:
         w.add_document(title=title, year=year)
     w.commit()
-    
+
     with ix.searcher() as s:
         qp = qparser.QueryParser("title", ix.schema)
         q = qp.parse(u("title:bravo ANDMAYBE year:2004"))
-        
+
         titles = [hit["title"] for hit in s.search(q, limit=None)[:2]]
         print("titles1=", titles)
         assert "Juliet Kilo Bravo" in titles
-        
+
         titles = [hit["title"] for hit in s.search(q, limit=2)]
         print("titles2=", titles)
         assert "Juliet Kilo Bravo" in titles
     w.add_document(id="d", text=u("delta echo foxtrot golf hotel"))
     w.add_document(id="e", text=u("echo foxtrot golf hotel india"))
     w.commit()
-        
+
     with ix.searcher() as s:
         r = s.search(query.Term("text", u("golf")), limit=10)
         assert_equal(len(r), 3)
         for _ in r:
             count += 1
         assert_equal(count, 5)
+
+def test_scorer():
+    schema = fields.Schema(key=fields.TEXT(stored=True))
+    ix = RamStorage().create_index(schema)
+    w = ix.writer()
+    w.add_document(key=u("alfa alfa alfa"))
+    w.add_document(key=u("alfa alfa alfa alfa"))
+    w.add_document(key=u("alfa alfa"))
+    w.commit()
+    w = ix.writer()
+    w.add_document(key=u("alfa alfa alfa alfa alfa alfa"))
+    w.add_document(key=u("alfa"))
+    w.add_document(key=u("alfa alfa alfa alfa alfa"))
+    w.commit(merge=False)
+
+    dw = scoring.DebugModel()
+    s = ix.searcher(weighting=dw)
+    r = s.search(query.Term("key", "alfa"))
+    log = dw.log
+    assert_equal(log, [('key', 'alfa', 0, 3.0, 3), ('key', 'alfa', 1, 4.0, 4),
+                       ('key', 'alfa', 2, 2.0, 2), ('key', 'alfa', 0, 6.0, 6),
+                       ('key', 'alfa', 1, 1.0, 1), ('key', 'alfa', 2, 5.0, 5)])
+
+