Commits

Matt Chaput  committed 4746a1c

Added IndexWriter.group/start_group/end_group, fixed up NestedDocument query.
Scoring system and Query.matcher() now allow more scoring flexibility.

  • Participants
  • Parent commits 0e919ee
  • Branches nested

Comments (0)

Files changed (7)

File src/whoosh/filedb/multiproc.py

         self.jobqueue = Queue(self.procs * 4)
         self.resultqueue = Queue()
         self.docbuffer = []
+        self._grouping = 0
 
         self.writelock = ix.lock("WRITELOCK")
         self.writelock.acquire()
         finally:
             self.writelock.release()
 
+    def start_group(self):
+        self._grouping += 1
+
+    def end_group(self):
+        if not self._grouping:
+            raise Exception("Unbalanced end_group")
+        self._grouping -= 1
+
     def add_document(self, **fields):
         self.docbuffer.append(fields)
-        if len(self.docbuffer) >= self.bufferlimit:
-            self._enqueue()
-
-    def add_document_group(self, docs):
-        # Add the documents to the doc buffer all at once
-        self.docbuffer.extend(docs)
-        # THEN check if the buffer is too big
-        if len(self.docbuffer) >= self.bufferlimit:
+        if not self._grouping and len(self.docbuffer) >= self.bufferlimit:
             self._enqueue()
 
     def commit(self, **kwargs):
         self.tasks = []
         self.buffer = []
         self.bufferlimit = batchsize
+        self._grouping = 0
 
     def _new_task(self, firstjob):
         task = PoolWritingTask(self.schema, self.dir, self.jobqueue,
 
     def _append(self, item):
         self.buffer.append(item)
-        if len(self.buffer) > self.bufferlimit:
+        if not self._grouping and len(self.buffer) > self.bufferlimit:
             self._enqueue()
 
+    def start_group(self):
+        self._grouping += 1
+
+    def end_group(self):
+        if not self._grouping:
+            raise Exception("Unbalanced end_group")
+        self._grouping -= 1
+
     def add_content(self, *args):
         self._append((0, args))
 

File src/whoosh/query.py

 import re
 from array import array
 
+from whoosh import matching
 from whoosh.analysis import Token
 from whoosh.compat import u, text_type
 from whoosh.lang.morph_en import variations
-from whoosh.matching import (AndMaybeMatcher, DisjunctionMaxMatcher,
-                             ListMatcher, IntersectionMatcher, InverseMatcher,
-                             NullMatcher, RequireMatcher, UnionMatcher,
-                             WrappingMatcher, AndNotMatcher, NullMatcherClass,
-                             Matcher)
 from whoosh.reading import TermNotFound
+from whoosh.support.bitvector import BitSet, SortedIntSet
 from whoosh.support.times import datetime_to_long
 from whoosh.util import make_binary_tree, make_weighted_tree, methodcaller
 
 
         return self.estimate_size(ixreader)
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         """Returns a :class:`~whoosh.matching.Matcher` object you can use to
         retrieve documents and scores matching this query.
         
         :rtype: :class:`whoosh.matching.Matcher`
         """
+
         raise NotImplementedError
 
     def docs(self, searcher):
     def estimate_min_size(self, ixreader):
         return self.child.estimate_min_size(ixreader)
 
-    def matcher(self, searcher):
-        return self.child.matcher(searcher)
+    def matcher(self, searcher, weighting=None):
+        return self.child.matcher(searcher, weighting=weighting)
 
 
 class CompoundQuery(Query):
         else:
             return NullQuery
 
-    def _matcher(self, matchercls, q_weight_fn, searcher, **kwargs):
+    def _matcher(self, matchercls, q_weight_fn, searcher, weighting=None,
+                 **kwargs):
         # q_weight_fn is a function which is called on each query and returns a
         # "weight" value which is used to build a huffman-like matcher tree. If
         # q_weight_fn is None, an order-preserving binary tree is used instead.
         subs, nots = self._split_queries()
 
         if not subs:
-            return NullMatcher()
+            return matching.NullMatcher()
 
         # Create a matcher from the list of subqueries
+        subms = [q.matcher(searcher, weighting=weighting) for q in subs]
         if len(subs) == 1:
-            m = subs[0].matcher(searcher)
+            m = subms[0]
         elif q_weight_fn is None:
-            subms = [q.matcher(searcher) for q in subs]
             m = make_binary_tree(matchercls, subms)
         else:
-            subms = [(q_weight_fn(q), q.matcher(searcher)) for q in subs]
-            m = make_weighted_tree(matchercls, subms)
+            w_subms = [(q_weight_fn(q), m) for q, m in zip(subs, subms)]
+            m = make_weighted_tree(matchercls, w_subms)
 
         # If there were queries inside Not(), make a matcher for them and
         # wrap the matchers in an AndNotMatcher
                 r = searcher.reader()
                 notms = [(q.estimate_size(r), q.matcher(searcher))
                          for q in nots]
-                notm = make_weighted_tree(UnionMatcher, notms)
+                notm = make_weighted_tree(matching.UnionMatcher, notms)
 
             if notm.is_active():
-                m = AndNotMatcher(m, notm)
+                m = matching.AndNotMatcher(m, notm)
 
         # If this query had a boost, add a wrapping matcher to apply the boost
         if self.boost != 1.0:
-            m = WrappingMatcher(m, self.boost)
+            m = matching.WrappingMatcher(m, self.boost)
 
         return m
 
                 termset.add(term)
         return termset
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         fieldname = self.fieldname
         reader = searcher.reader()
         qs = [Term(fieldname, word) for word in self._words(reader)]
         if not qs:
-            return NullMatcher()
+            return matching.NullMatcher()
 
         if len(qs) == 1:
             # If there's only one term, just use it
             docset = set()
             for q in qs:
                 docset.update(q.matcher(searcher).all_ids())
-            return ListMatcher(sorted(docset), all_weights=self.boost)
+            return matching.ListMatcher(sorted(docset), all_weights=self.boost)
 
         else:
             # The default case: Or the terms together
             q = Or(qs)
 
-        return q.matcher(searcher)
+        return q.matcher(searcher, weighting=weighting)
 
 
 # Concrete classes
     def estimate_size(self, ixreader):
         return ixreader.doc_frequency(self.fieldname, self.text)
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         if (self.fieldname, self.text) in searcher.reader():
-            m = searcher.postings(self.fieldname, self.text)
+            m = searcher.postings(self.fieldname, self.text,
+                                  weighting=weighting)
             if self.boost != 1.0:
-                m = WrappingMatcher(m, boost=self.boost)
+                m = matching.WrappingMatcher(m, boost=self.boost)
             return m
         else:
-            return NullMatcher()
+            return matching.NullMatcher()
 
 
 class And(CompoundQuery):
     def estimate_size(self, ixreader):
         return min(q.estimate_size(ixreader) for q in self.subqueries)
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         r = searcher.reader()
-        return self._matcher(IntersectionMatcher,
-                             lambda q: 0 - q.estimate_size(r), searcher)
+        return self._matcher(matching.IntersectionMatcher,
+                             lambda q: 0 - q.estimate_size(r), searcher,
+                             weighting=weighting)
 
 
 class Or(CompoundQuery):
     # This is used by the superclass's __unicode__ method.
     JOINT = " OR "
     intersect_merge = False
-    matcher_class = UnionMatcher
+    matcher_class = matching.UnionMatcher
 
     def __init__(self, subqueries, boost=1.0, minmatch=0):
         CompoundQuery.__init__(self, subqueries, boost=boost)
         else:
             return set()
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         r = searcher.reader()
         return self._matcher(self.matcher_class, lambda q: q.estimate_size(r),
-                             searcher)
+                             searcher, weighting=weighting)
 
 
 class DisjunctionMax(CompoundQuery):
         else:
             return set()
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         r = searcher.reader()
-        return self._matcher(DisjunctionMaxMatcher,
+        return self._matcher(matching.DisjunctionMaxMatcher,
                              lambda q: q.estimate_size(r), searcher,
-                             tiebreak=self.tiebreak)
+                             weighting=weighting, tiebreak=self.tiebreak)
 
 
 class Not(Query):
     def estimate_min_size(self, ixreader):
         return 1 if ixreader.doc_count() else 0
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         # Usually only called if Not is the root query. Otherwise, queries such
         # as And and Or do special handling of Not subqueries.
         reader = searcher.reader()
         child = self.query.matcher(searcher)
-        return InverseMatcher(child, searcher.doc_count_all(),
-                              missing=reader.is_deleted)
+        return matching.InverseMatcher(child, searcher.doc_count_all(),
+                                       missing=reader.is_deleted)
 
 
 class PatternQuery(MultiTerm):
             q = ConstantScoreQuery(q, self.boost)
         return q
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         q = self._compile_query(searcher.reader())
-        return q.matcher(searcher)
+        return q.matcher(searcher, weighting=weighting)
 
 
 class DateRange(NumericRange):
     def estimate_min_size(self, ixreader):
         return self._and_query().estimate_min_size(ixreader)
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         fieldname = self.fieldname
         reader = searcher.reader()
 
         # Shortcut the query if one of the words doesn't exist.
         for word in self.words:
             if (fieldname, word) not in reader:
-                return NullMatcher()
+                return matching.NullMatcher()
 
         field = searcher.schema[fieldname]
         if not field.format or not field.format.supports("positions"):
         # phrase and return its matcher
         from whoosh.spans import SpanNear
         q = SpanNear.phrase(fieldname, self.words, slop=self.slop)
-        m = q.matcher(searcher)
+        m = q.matcher(searcher, weighting=weighting)
         if self.boost != 1.0:
-            m = WrappingMatcher(m, boost=self.boost)
+            m = matching.WrappingMatcher(m, boost=self.boost)
         return m
 
 
 
     JOINT = " BEFORE "
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         from whoosh.spans import SpanBefore
 
-        return self._matcher(SpanBefore._Matcher, None, searcher)
+        return self._matcher(SpanBefore._Matcher, None, searcher,
+                             weighting=weighting)
 
 
 class Every(Query):
     def estimate_size(self, ixreader):
         return ixreader.doc_count()
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         fieldname = self.fieldname
         reader = searcher.reader()
 
                 doclist.update(pr.all_ids())
             doclist = sorted(doclist)
 
-        return ListMatcher(doclist, all_weights=self.boost)
+        return matching.ListMatcher(doclist, all_weights=self.boost)
 
 
 class _NullQuery(Query):
     def docs(self, searcher):
         return []
 
-    def matcher(self, searcher):
-        return NullMatcher()
+    def matcher(self, searcher, weighting=None):
+        return matching.NullMatcher()
 
 NullQuery = _NullQuery()
 
     """
 
     def __init__(self, child, score=1.0):
-        super(ConstantScoreQuery, self).__init__(child)
+        WrappingQuery.__init__(self, child)
         self.score = score
 
     def __eq__(self, other):
     def _rewrap(self, child):
         return self.__class__(child, self.score)
 
-    def matcher(self, searcher):
+    def matcher(self, searcher, weighting=None):
         m = self.child.matcher(searcher)
-        if isinstance(m, NullMatcherClass):
+        if isinstance(m, matching.NullMatcherClass):
             return m
         else:
             ids = array("I", m.all_ids())
-            return ListMatcher(ids, all_weights=self.score, term=m.term())
+            return matching.ListMatcher(ids, all_weights=self.score,
+                                        term=m.term())
+
+
+class WeightingQuery(WrappingQuery):
+    """Wraps a query and uses a specific :class:`whoosh.sorting.WeightingModel`
+    to score documents that match the wrapped query.
+    """
+
+    def __init__(self, child, weighting):
+        WrappingQuery.__init__(self, child)
+        self.weighting = weighting
+
+    def matcher(self, searcher, weighting=None):
+        return self.child.matcher(searcher, self.weighting)
 
 
 class BinaryQuery(CompoundQuery):
 
         return self.__class__(a, b)
 
-    def matcher(self, searcher):
-        return self.matcherclass(self.a.matcher(searcher),
-                                 self.b.matcher(searcher))
+    def matcher(self, searcher, weighting=None):
+        return self.matcherclass(self.a.matcher(searcher, weighting=weighting),
+                                 self.b.matcher(searcher, weighting=weighting))
 
 
 class Require(BinaryQuery):
     """
 
     JOINT = " REQUIRE "
-    matcherclass = RequireMatcher
+    matcherclass = matching.RequireMatcher
 
     def requires(self):
         return self.a.requires() | self.b.requires()
     """
 
     JOINT = " ANDMAYBE "
-    matcherclass = AndMaybeMatcher
+    matcherclass = matching.AndMaybeMatcher
 
     def normalize(self):
         a = self.a.normalize()
     """
 
     JOINT = " ANDNOT "
-    matcherclass = AndNotMatcher
+    matcherclass = matching.AndNotMatcher
 
     def with_boost(self, boost):
         return self.__class__(self.a.with_boost(boost), self.b)
 
     JOINT = " OTHERWISE "
 
-    def matcher(self, searcher):
-        m = self.a.matcher(searcher)
+    def matcher(self, searcher, weighting=None):
+        m = self.a.matcher(searcher, weighting=weighting)
         if not m.is_active():
-            m = self.b.matcher(searcher)
+            m = self.b.matcher(searcher, weighting=weighting)
         return m
 
 
 class NestedDocument(WrappingQuery):
-    def __init__(self, parents, q, per_parent_limit=None, score_fn=sum):
-        self.parents = parents
+    def __init__(self, parentq, q, per_parent_limit=None, score_fn=sum):
+        self.parentq = parentq
         self.child = q
         self.per_parent_limit = per_parent_limit
         self.score_fn = score_fn
 
     def normalize(self):
-        p = self.parents.normalize()
-        q = self.q.normalize()
+        p = self.parentq.normalize()
+        q = self.child.normalize()
 
         if p is NullQuery or q is NullQuery:
             return NullQuery
         return self.__class__(p, q)
 
     def requires(self):
-        return self.q.requires()
-
-    def matcher(self, searcher):
-        comb = searcher._filter_to_comb(self.parents)
-        m = self.child.matcher(searcher)
-        return self.NestedDocumentMatcher(comb, m, self.per_parent_limit,
-                                          self.score_fn)
-
-    class NestedDocumentMatcher(Matcher):
-        def __init__(self, comb, child, per_parent_limit, score_fn):
+        return self.child.requires()
+
+    def matcher(self, searcher, weighting=None):
+        bits = BitSet(searcher.doc_count_all(), self.parentq.docs(searcher))
+        m = self.child.matcher(searcher, weighting=weighting)
+        return self.NestedDocumentMatcher(bits, m, self.per_parent_limit)
+
+    class NestedDocumentMatcher(matching.Matcher):
+        def __init__(self, comb, child, per_parent_limit):
             self.comb = comb
             self.child = child
             self.per_parent_limit = per_parent_limit
-            self.score_fn = score_fn
-            self._gather()
+
+            self._nextdoc = None
+            if self.child.is_active():
+                self._gather()
 
         def is_active(self):
             return self._nextdoc is not None
         def _gather(self):
             child = self.child
             pplimit = self.per_parent_limit
-            scores = [child.score()]
-            self._nextdoc = parent = self._parent(child.id())
-
+            self._nextdoc = self._parent(child.id())
+
+            nextparent = self.comb.after(child.id())
             count = 1
-            while (child.is_active()
-                   and self._parent(child.id()) == parent):
+            score = 0
+            while (child.is_active() and
+                   (nextparent is None or child.id() < nextparent)):
                 if pplimit and count > pplimit:
-                    comb = self.comb
-                    docid = child.id()
-                    while docid not in comb:
-                        docid += 1
-                    child.skip_to(docid)
+                    child.skip_to(nextparent)
                     break
 
-                scores.append(child.score())
+                score += child.score()
                 child.next()
                 count += 1
 
-            self._nextscore = self.score_fn(scores)
+            self._nextscore = score
 
         def id(self):
             return self._nextdoc
             raise NotImplementedError(self.__class__)
 
 
-
 def BooleanQuery(required, should, prohibited):
     return AndNot(AndMaybe(And(required), Or(should)),
                   Or(prohibited)).normalize()

File src/whoosh/scoring.py

     a scorer instance.
     """
 
+    def doc_field_length(self, docnum):
+        return self.searcher.doc_field_length(docnum, self.fieldname)
+
     def supports_block_quality(self):
         """Returns True if this class supports quality optimizations.
         """
 
         raise NotImplementedError(self.__class__.__name__)
 
+    def score_weight_length(self, weight, length):
+        """Returns a score for a document with the given term weight and
+        field length.
+        """
+
+        raise NotImplementedError(self.__class__.__name__)
+
     def block_quality(self, matcher):
         """Returns the *maximum possible score* the matcher can give in its
         current "block" (whatever concept of "block" the backend might use). If
         raise NotImplementedError(self.__class__.__name__)
 
 
+class WrappingScorer(BaseScorer):
+    def __init__(self, child):
+        self.child = child
+
+    @property
+    def max_quality(self):
+        return self.child.max_quality
+
+    def doc_field_length(self, docnum):
+        return self.child.doc_field_length(docnum)
+
+    def supports_block_quality(self):
+        return self.child.supports_block_quality()
+
+    def score(self, matcher):
+        return self.child.score(matcher)
+
+    def score_weight_length(self, weight, length):
+        return self.child.score_weight_length(weight, length)
+
+    def block_quality(self, matcher):
+        return self.child.block_quality(matcher)
+
+
 # Scorer that just returns term weight
 
 class WeightScorer(BaseScorer):
     scorer for fields that aren't scorable (don't store field lengths).
     """
 
-    def __init__(self, maxweight):
-        self.max_quality = maxweight
+    def __init__(self, searcher, fieldname, text):
+        ti = searcher.term_info(fieldname, text)
+        self.max_quality = ti.max_weight()
+
+    def doc_field_length(self, docnum):
+        return 1
 
     def supports_block_quality(self):
         return True
     def score(self, matcher):
         return matcher.weight()
 
+    def score_weight_length(self, weight, length):
+        return weight
+
     def block_quality(self, matcher):
         return matcher.block_max_weight()
 
-    @classmethod
-    def for_(cls, searcher, fieldname, text):
-        ti = searcher.term_info(fieldname, text)
-        return cls(ti.max_weight())
 
+class TestingWeightScorer(WeightScorer):
+    # This is a variation of WeightScorer useful for testing
+    def __init__(self, maxweight):
+        self.max_quality = maxweight
 
-# Base scorer for models that only use weight and field length
+
+# Base scorer for models that use weight and field length
 
 class WeightLengthScorer(BaseScorer):
     """Base class for scorers where the only per-document variables are term
     weight and field length.
     
-    Subclasses should override the ``_score(weight, length)`` method to return
-    the score for a document with the given weight and length, and call the
-    ``setup()`` method at the end of the initializer to set up common
-    attributes.
+    Subclasses should override the ``score_weight_length(weight, length)``
+    method to return the score for a document with the given weight and length,
+    and call the ``setup()`` method at the end of the initializer to set up
+    common attributes.
     """
 
     def setup(self, searcher, fieldname, text):
         arguments. Any additional arguments given to this method are passed
         through to the initializer.
         
-        Note: this method calls ``self._score()``, so you should only call it
-        in the initializer after setting up whatever attributes ``_score()``
-        depends on::
+        Note: this method calls ``self.score_weight_length()``, so you should
+        only call it in the initializer after setting up whatever attributes
+        ``score_weight_length()`` depends on::
         
             class MyScorer(WeightLengthScorer):
                 def __init__(self, searcher, fieldname, text, parm=1.0):
                     self.parm = parm
                     self.setup(searcher, fieldname, text)
                 
-                def _score(self, weight, length):
+                def score_weight_length(self, weight, length):
                     return (weight / (length + 1)) * self.parm
         """
 
+        if not searcher.schema[fieldname].scorable:
+            return WeightScorer(searcher, fieldname, text)
+
+        self.searcher = searcher
+        self.fieldname = fieldname
         ti = searcher.term_info(fieldname, text)
-        if not searcher.schema[fieldname].scorable:
-            return WeightScorer(ti.max_weight())
-
-        self.dfl = lambda docid: searcher.doc_field_length(docid, fieldname, 1)
-        self.max_quality = self._score(ti.max_weight(), ti.min_length())
+        self.max_quality = self.score_weight_length(ti.max_weight(),
+                                                    ti.min_length())
 
     def supports_block_quality(self):
         return True
 
     def score(self, matcher):
-        return self._score(matcher.weight(), self.dfl(matcher.id()))
+        return self.score_weight_length(matcher.weight(),
+                                        self.doc_field_length(matcher.id()))
 
     def block_quality(self, matcher):
-        return self._score(matcher.block_max_weight(),
-                           matcher.block_min_length())
+        return self.score_weight_length(matcher.block_max_weight(),
+                                        matcher.block_min_length())
 
-    def _score(self, weight, length):
+    def score_weight_length(self, weight, length):
         # Override this method with the actual scoring function
         raise NotImplementedError(self.__class__.__name__)
 
 # Debugging model
 
 class DebugModel(WeightingModel):
-    def __init__(self):
+    def __init__(self, model):
         self.log = []
+        self.model = model
 
     def scorer(self, searcher, fieldname, text, qf=1):
-        return DebugScorer(searcher, fieldname, text, self.log)
+        scorer = self.model.scorer(searcher, fieldname, text, qf=qf)
+        return self.DebugScorer(searcher, fieldname, text, self.log, scorer)
 
+    class DebugScorer(WrappingScorer):
+        def __init__(self, searcher, fieldname, text, log, child):
+            self.child = child
+            self.searcher = searcher
+            self.fieldname = fieldname
+            self.text = text
+            self.log = log
 
-class DebugScorer(BaseScorer):
-    def __init__(self, searcher, fieldname, text, log):
-        ti = searcher.term_info(fieldname, text)
-        self.max_quality = ti.max_weight()
+        def score(self, matcher):
+            fieldname, text = self.fieldname, self.text
+            docid = matcher.id()
+            w = matcher.weight()
+            length = self.searcher.doc_field_length(docid, fieldname)
 
-        self.searcher = searcher
-        self.fieldname = fieldname
-        self.text = text
-        self.log = log
-
-    def supports_block_quality(self):
-        return True
-
-    def score(self, matcher):
-        fieldname, text = self.fieldname, self.text
-        docid = matcher.id()
-        w = matcher.weight()
-        length = self.searcher.doc_field_length(docid, fieldname)
-        self.log.append((fieldname, text, docid, w, length))
-        return w
-
-    def block_quality(self, matcher):
-        return matcher.block_max_weight()
+            s = self.child.score(matcher)
+            self.log.append((fieldname, text, docid, w, length, s))
+            return s
 
 
 # BM25F Model
 
     def scorer(self, searcher, fieldname, text, qf=1):
         if not searcher.schema[fieldname].scorable:
-            return WeightScorer.for_(searcher, fieldname, text)
+            return WeightScorer(searcher, fieldname, text)
 
         if fieldname in self._field_B:
             B = self._field_B[fieldname]
         self.qf = qf
         self.setup(searcher, fieldname, text)
 
-    def _score(self, weight, length):
+    def score_weight_length(self, weight, length):
         s = bm25(self.idf, weight, length, self.avgfl, self.B, self.K1)
         return s
 
 
     def scorer(self, searcher, fieldname, text, qf=1):
         if not searcher.schema[fieldname].scorable:
-            return WeightScorer.for_(searcher, fieldname, text)
+            return WeightScorer(searcher, fieldname, text)
 
         return DFreeScorer(searcher, fieldname, text, qf=qf)
 
         self.qf = qf
         self.setup(searcher, fieldname, text)
 
-    def _score(self, weight, length):
+    def score_weight_length(self, weight, length):
         return dfree(weight, self.cf, self.qf, length, self.fl)
 
 
 
     def scorer(self, searcher, fieldname, text, qf=1):
         if not searcher.schema[fieldname].scorable:
-            return WeightScorer.for_(searcher, fieldname, text)
+            return WeightScorer(searcher, fieldname, text)
 
         return PL2Scorer(searcher, fieldname, text, self.c, qf=qf)
 
         self.qf = qf
         self.setup(searcher, fieldname, text)
 
-    def _score(self, weight, length):
+    def score_weight_length(self, weight, length):
         return pl2(weight, self.cf, self.qf, self.dc, length, self.avgfl,
                    self.c)
 
 # Simple models
 
 class Frequency(WeightingModel):
+    """Simple weighting model that just uses the term frequency as the score.
+    """
+
+    def scorer(self, searcher, fieldname, text, qf=1):
+        return WeightScorer(searcher, fieldname, text)
+
+
+class TF_IDF(WeightingModel):
+    """Simple weighting model that uses term frequency * inverse document
+    frequency.
+    """
+
     def scorer(self, searcher, fieldname, text, qf=1):
         maxweight = searcher.term_info(fieldname, text).max_weight()
-        return WeightScorer(maxweight)
 
-
-class TF_IDF(WeightingModel):
-    def scorer(self, searcher, fieldname, text, qf=1):
         # IDF is a global statistic, so get it from the top-level searcher
         parent = searcher.get_parent()  # Returns self if no parent
         idf = parent.idf(fieldname, text)
 
-        maxweight = searcher.term_info(fieldname, text).max_weight()
-        return TF_IDFScorer(maxweight, idf)
+        return self.TF_IDFScorer(maxweight, idf)
 
+    class TF_IDFScorer(BaseScorer):
+        def __init__(self, maxweight, idf):
+            self.max_quality = maxweight * idf
+            self.idf = idf
 
-class TF_IDFScorer(BaseScorer):
-    def __init__(self, maxweight, idf):
-        self.max_quality = maxweight * idf
-        self.idf = idf
+        def supports_block_quality(self):
+            return True
 
-    def supports_block_quality(self):
-        return True
+        def score(self, matcher):
+            return matcher.weight() * self.idf
 
-    def score(self, matcher):
-        return matcher.weight() * self.idf
+        def score_weight_length(self, weight, length):
+            return weight * self.idf
 
-    def block_quality(self, matcher):
-        return matcher.block_max_weight() * self.idf
+        def block_quality(self, matcher):
+            return matcher.block_max_weight() * self.idf
 
 
 # Utility models
         subscorer = self.weighting.scorer(searcher, fieldname, text, qf=qf)
         return ReverseWeighting.ReverseScorer(subscorer)
 
-    class ReverseScorer(BaseScorer):
-        def __init__(self, subscorer):
-            self.subscorer = subscorer
-            self.max_quality = 0 - subscorer.max_quality
-
-        def supports_block_quality(self):
-            return self.subscorer.supports_block_quality()
+    class ReverseScorer(WrappingScorer):
+        @property
+        def max_quality(self):
+            return 0 - self.child.max_quality
 
         def score(self, matcher):
-            return 0 - self.subscorer.score(matcher)
+            return 0 - self.child.score(matcher)
+
+        def score_weight_length(self, weight, length):
+            return 0 - self.child.score_weight_length(weight, length)
 
         def block_quality(self, matcher):
             return 0 - self.subscorer.block_quality(matcher)

File src/whoosh/searching.py

 
         return self.weighting.scorer(self, fieldname, text, qf=qf)
 
-    def postings(self, fieldname, text, qf=1):
+    def postings(self, fieldname, text, weighting=None, qf=1):
         """Returns a :class:`whoosh.matching.Matcher` for the postings of the
         given term. Unlike the :func:`whoosh.reading.IndexReader.postings`
         method, this method automatically sets the scoring functions on the
         matcher from the searcher's weighting object.
         """
 
-        scorer = self.scorer(fieldname, text, qf=qf)
+        weighting = weighting or self.weighting
+        scorer = weighting.scorer(self, fieldname, text, qf=qf)
         return self.ixreader.postings(fieldname, text, scorer=scorer)
 
     def idf(self, fieldname, text):
 
     def search(self, q, limit=10, sortedby=None, reverse=False, groupedby=None,
                optimize=True, filter=None, mask=None, terms=False,
-               maptype=None):
+               maptype=None, weighting=None):
         """Runs the query represented by the ``query`` object and returns a
         Results object.
         
             return collector.sort(self, q, sortedby, reverse=reverse,
                                   allow=filter, restrict=mask)
         else:
-            return collector.search(self, q, allow=filter, restrict=mask)
+            return collector.search(self, q, allow=filter, restrict=mask,
+                                    weighting=weighting)
 
     def correct_query(self, q, qstring, correctors=None, allfields=False,
                       terms=None, prefix=0, maxdist=2):
             self.timer.start()
 
     def _reset(self):
+        self.skipped = 0
         self.facetmaps = {}
         self.items = []
         self.timedout = False
         self.runtime = -1
         self.minscore = None
+        self.weighting = None
         if self.facets:
+            # Call the .map() method on each facet to get a mapping object for
+            # each facet, which is used to keep track of facet groupings. The
+            # map() method takes a default mapping type as an argument, which
+            # will be used if the facet type doesn't have its own mapping type
+            # explicity set.
             self.facetmaps = dict((facetname, facet.map(self.maptype))
                                   for facetname, facet in self.facets.items())
         else:
                     key = catter.key_to_name(catter.key_for_id(id))
                     add(key, offsetid, sortkey)
 
-    def search(self, searcher, q, allow=None, restrict=None):
+    def search(self, searcher, q, allow=None, restrict=None, weighting=None):
         """Top-level method call which uses the given :class:`Searcher` and
         :class:`whoosh.query.Query` objects to return a :class:`Results`
         object.
         """
 
         self.searcher = searcher
+        self.weighting = weighting
         self.q = q
         self._set_filters(allow, restrict)
         self._reset()
         replacecounter = 0
         timelimited = bool(self.timelimit)
 
-        matcher = q.matcher(self.subsearcher)
+        matcher = q.matcher(self.subsearcher, weighting=self.weighting)
         usequality = self.use_block_quality(self.subsearcher, matcher)
 
         termlists = self.termlists
             # flag is true, try to skip ahead to the next block with the
             # minimum required quality
             if usequality and checkquality and minscore is not None:
-                matcher.skip_to_quality(minscore)
+                self.skipped += matcher.skip_to_quality(minscore)
                 # Skipping ahead might have moved the matcher to the end of the
                 # posting list
                 if not matcher.is_active():

File src/whoosh/spans.py

     wrapped query, and ``matcher()`` to return a span-aware matcher object.
     """
 
-    def _subm(self, s):
-        return self.q.matcher(s)
+    def _subm(self, s, weighting=None):
+        return self.q.matcher(s, weighting=weighting)
 
     def __getattr__(self, name):
         return super(Query, self).__getattr(self.q, name)
     def apply(self, fn):
         return self.__class__(fn(self.q), limit=self.limit)
 
-    def matcher(self, searcher):
-        return SpanFirst.SpanFirstMatcher(self._subm(searcher),
-                                          limit=self.limit)
+    def matcher(self, searcher, weighting=None):
+        m = self._subm(searcher, weighting=weighting)
+        return SpanFirst.SpanFirstMatcher(m, limit=self.limit)
 
     class SpanFirstMatcher(SpanWrappingMatcher):
         def __init__(self, child, limit=0):
         return self.__class__(fn(self.a), fn(self.b), slop=self.slop,
                               ordered=self.ordered, mindist=self.mindist)
 
-    def matcher(self, searcher):
-        ma = self.a.matcher(searcher)
-        mb = self.b.matcher(searcher)
+    def matcher(self, searcher, weighting=None):
+        ma = self.a.matcher(searcher, weighting=weighting)
+        mb = self.b.matcher(searcher, weighting=weighting)
         return SpanNear.SpanNearMatcher(ma, mb, slop=self.slop,
                                         ordered=self.ordered,
                                         mindist=self.mindist)
     def apply(self, fn):
         return self.__class__([fn(sq) for sq in self.subqs])
 
-    def matcher(self, searcher):
-        matchers = [q.matcher(searcher) for q in self.subqs]
+    def matcher(self, searcher, weighting=None):
+        matchers = [q.matcher(searcher, weighting=weighting)
+                    for q in self.subqs]
         return make_binary_tree(SpanOr.SpanOrMatcher, matchers)
 
     class SpanOrMatcher(SpanBiMatcher):
     def apply(self, fn):
         return self.__class__(fn(self.a), fn(self.b))
 
-    def matcher(self, searcher):
-        ma = self.a.matcher(searcher)
-        mb = self.b.matcher(searcher)
+    def matcher(self, searcher, weighting=None):
+        ma = self.a.matcher(searcher, weighting=weighting)
+        mb = self.b.matcher(searcher, weighting=weighting)
         return self._Matcher(ma, mb)
 
 

File src/whoosh/writing.py

 from __future__ import with_statement
 import threading
 import time
+from contextlib import contextmanager
 
 from whoosh.store import LockError
 from whoosh.util import abstractmethod, synchronized
     pass
 
 
+# 
+
+@contextmanager
+def groupmanager(writer):
+    writer.start_group()
+    yield
+    writer.end_group()
+
+
 # Base class
 
 class IndexWriter(object):
         else:
             self.commit()
 
+    def group(self):
+        """Returns a context manager that calls
+        :meth:`~IndexWriter.start_group` and :meth:`~IndexWriter.end_group` for
+        you, allowing you to use a ``with`` statement to group hierarchical
+        documents::
+        
+            with myindex.writer() as w:
+                with w.group():
+                    w.add_document(kind="class", name="Accumulator")
+                    w.add_document(kind="method", name="add")
+                    w.add_document(kind="method", name="get_result")
+                    w.add_document(kind="method", name="close")
+                
+                with w.group():
+                    w.add_document(kind="class", name="Calculator")
+                    w.add_document(kind="method", name="add")
+                    w.add_document(kind="method", name="multiply")
+                    w.add_document(kind="method", name="get_result")
+                    w.add_document(kind="method", name="close")
+        """
+
+        return groupmanager(self)
+
+    def start_group(self):
+        """Start indexing a group of hierarchical documents. The backend should
+        ensure that these documents are all added to the same segment::
+        
+            with myindex.writer() as w:
+                w.start_group()
+                w.add_document(kind="class", name="Accumulator")
+                w.add_document(kind="method", name="add")
+                w.add_document(kind="method", name="get_result")
+                w.add_document(kind="method", name="close")
+                w.end_group()
+                
+                w.start_group()
+                w.add_document(kind="class", name="Calculator")
+                w.add_document(kind="method", name="add")
+                w.add_document(kind="method", name="multiply")
+                w.add_document(kind="method", name="get_result")
+                w.add_document(kind="method", name="close")
+                w.end_group()
+        
+        A more convenient way to group documents is to use the
+        :meth:`~IndexWriter.group` method and the ``with`` statement.
+        """
+
+        pass
+
+    def end_group(self):
+        """Finish indexing a group of hierarchical documents. See
+        :meth:`~IndexWriter.start_group`.
+        """
+
+        pass
+
     def add_field(self, fieldname, fieldtype, **kwargs):
         """Adds a field to the index's schema.
         
         # Check which of the supplied fields are unique
         unique_fields = [name for name, field in self.schema.items()
                          if name in fields and field.unique]
-        if not unique_fields:
-            raise IndexingError("None of the fields in %r"
-                                " are unique" % list(fields.keys()))
         return unique_fields
 
     def update_document(self, **fields):
         # Add the given fields
         self.add_document(**fields)
 
-    def add_document_group(self, docs):
-        for doc in docs:
-            self.add_document(**doc)
-
-    def update_document_group(self, docs):
-        for doc in docs:
-            self.update_document(**doc)
-
     def commit(self):
         """Finishes writing and unlocks the index.
         """

File tests/test_indexing.py

         r = s.search(query.Term("a", "alfa"))
         assert_equal([hit["id"] for hit in r], [1, 0, 3, 2])
 
-def test_nested():
-    schema = fields.Schema(name=fields.ID(stored=True), type=fields.ID,
-                           part=fields.ID, price=fields.NUMERIC)
-    ix = RamStorage().create_index(schema)
-    with ix.writer() as w:
-        w.add_document(name=u("iPad"), type=u("product"))
-        w.add_document(part=u("screen"), price=100)
-        w.add_document(part=u("battery"), price=50)
-        w.add_document(part=u("case"), price=20)
 
-        w.add_document(name=u("iPhone"), type=u("product"))
-        w.add_document(part=u("screen"), price=60)
-        w.add_document(part=u("battery"), price=30)
-        w.add_document(part=u("case"), price=10)
 
-        w.add_document(name=u("Mac mini"), type=u("product"))
-        w.add_document(part=u("hard drive"), price=50)
-        w.add_document(part=u("case"), price=50)
 
-    with ix.searcher() as s:
-        price = s.schema["price"]
 
-        pq = query.Term("type", "product")
-        cq = query.Term("price", price.to_text(50))
-        q = query.NestedDocument(pq, cq)
 
-        r = s.search(q)
-        assert_equal(sorted([hit["name"] for hit in r]), ["Mac mini", "iPad"])
 
 
 
 
 
 
-
-
-
-