Commits

Matt Chaput committed 10bf923

Added support for grouped indexing and nested document queries.

Comments (0)

Files changed (8)

src/whoosh/filedb/multiproc.py

 
 from whoosh.compat import xrange, iteritems, pickle
 from whoosh.codec import base
-from whoosh.filedb.filewriting import SegmentWriter
-from whoosh.support.externalsort import imerge, SortingPool
+from whoosh.filedb.filewriting import PostingPool, SegmentWriter
+from whoosh.support.externalsort import imerge
 
 
 def finish_subsegment(writer, k=64):
         # A buffer for documents before they are flushed to a job file
         self.docbuffer = []
 
+        self._grouping = 0
         self._added_sub = False
 
     def _new_task(self):
         finally:
             SegmentWriter.cancel(self)
 
+    def start_group(self):
+        self._grouping += 1
+
+    def end_group(self):
+        if not self._grouping:
+            raise Exception("Unbalanced end_group")
+        self._grouping -= 1
+
     def add_document(self, **fields):
         # Add the document to the docbuffer
         self.docbuffer.append((0, fields))
         # If the buffer is full, flush it to the job queue
-        if len(self.docbuffer) >= self.batchsize:
+        if not self._grouping and len(self.docbuffer) >= self.batchsize:
             self._enqueue()
         self._added_sub = True
 
         # Note that SortingPool._read_run() automatically deletes the run file
         # when it's finished
 
-        gen = SortingPool._read_run(path)
+        gen = PostingPool._read_run(path)
         # If offset is 0, just return the items unchanged
         if not offset:
             return gen

src/whoosh/query.py

 from whoosh.compat import u, text_type, bytes_type
 from whoosh.lang.morph_en import variations
 from whoosh.reading import TermNotFound
-from whoosh.support.bitvector import BitSet, SortedIntSet
 from whoosh.support.times import datetime_to_long
 from whoosh.util import make_binary_tree, make_weighted_tree, methodcaller
 
         return m
 
 
+class Nested(WrappingQuery):
+    """A query that allows you to search for "nested" documents, where you can
+    index (possibly multiple levels of) "parent" and "child" documents using
+    the :meth:`~whoosh.writing.IndexWriter.group` and/or
+    :meth:`~whoosh.writing.IndexWriter.start_group` methods of a
+    :class:`whoosh.writing.IndexWriter` to indicate that hierarchically related
+    documents should be kept together:
+    
+        schema = fields.Schema(type=fields.ID, text=fields.TEXT(stored=True))
+    
+        with ix.writer() as w:
+            # Say we're indexing chapters (type=chap) and each chapter has a
+            # number of paragraphs (type=p)
+            with w.group():
+                w.add_document(type="chap", text="Chapter 1")
+                w.add_document(type="p", text="Able baker")
+                w.add_document(type="p", text="Bright morning")
+            with w.group():
+                w.add_document(type="chap", text="Chapter 2")
+                w.add_document(type="p", text="Car trip")
+                w.add_document(type="p", text="Dog eared")
+                w.add_document(type="p", text="Every day")
+            with w.group():
+                w.add_document(type="chap", text="Chapter 3")
+                w.add_document(type="p", text="Fine day")
+                
+    
+    The ``Nested`` query wraps two sub-queries: the "parent query" matches a
+    class of "parent documents". The "sub query" matches nested documents you
+    want to find. For each "sub document" the "sub query" finds, this query
+    acts as if it found the corresponding "parent document".
+    
+    >>> with ix.searcher() as s:
+    ...   r = s.search(query.Term("text", "day"))
+    ...   for hit in r:
+    ...     print hit["text"]
+    ...
+    Chapter 2
+    Chapter 3
+    """
+
+    def __init__(self, parentq, subq, per_parent_limit=None, score_fn=sum):
+        """
+        :param parentq: a query matching the documents you want to use as the
+            "parent" documents. Where the sub-query matches, the corresponding
+            document in these results will be returned as the match.
+        :param subq: a query matching the information you want to find.
+        :param per_parent_limit: a maximum number of "sub documents" to search
+            per parent. The default is None, meaning no limit.
+        :param score_fn: a function to use to combine the scores of matching
+            sub-documents to calculate the score returned for the parent
+            document. The default is ``sum``, that is, add up the scores of the
+            sub-documents.
+        """
+
+        self.parentq = parentq
+        self.child = subq
+        self.per_parent_limit = per_parent_limit
+        self.score_fn = score_fn
+
+    def normalize(self):
+        p = self.parentq.normalize()
+        q = self.child.normalize()
+
+        if p is NullQuery or q is NullQuery:
+            return NullQuery
+
+        return self.__class__(p, q)
+
+    def requires(self):
+        return self.child.requires()
+
+    def matcher(self, searcher, weighting=None):
+        from whoosh.support.bitvector import BitSet, SortedIntSet
+
+        pm = self.parentq.matcher(searcher)
+        if not pm.is_active():
+            return matching.NullMatcher
+
+        bits = BitSet(searcher.doc_count_all(), pm.all_ids())
+        #bits = SortedIntSet(pm.all_ids())
+        m = self.child.matcher(searcher, weighting=weighting)
+        return self.NestedMatcher(bits, m, self.per_parent_limit,
+                                  searcher.doc_count_all())
+
+    class NestedMatcher(matching.Matcher):
+        def __init__(self, comb, child, per_parent_limit, maxdoc):
+            self.comb = comb
+            self.child = child
+            self.per_parent_limit = per_parent_limit
+            self.maxdoc = maxdoc
+
+            self._nextdoc = None
+            if self.child.is_active():
+                self._gather()
+
+        def is_active(self):
+            return self._nextdoc is not None
+
+        def supports_block_quality(self):
+            return False
+
+        def _gather(self):
+            # This is where the magic happens ;)
+            child = self.child
+            pplimit = self.per_parent_limit
+
+            # The next document returned by this matcher is the parent of the
+            # child's current document. We don't have to worry about whether
+            # the parent is deleted, because the query that gave us the parents
+            # wouldn't return deleted documents.
+            self._nextdoc = self.comb.before(child.id() + 1)
+            # The next parent after the child matcher's current document
+            nextparent = self.comb.after(child.id()) or self.maxdoc
+
+            # Sum the scores of all matching documents under the parent
+            count = 1
+            score = 0
+            while child.is_active() and child.id() < nextparent:
+                if pplimit and count > pplimit:
+                    child.skip_to(nextparent)
+                    break
+
+                score += child.score()
+                child.next()
+                count += 1
+
+            self._nextscore = score
+
+        def id(self):
+            return self._nextdoc
+
+        def score(self):
+            return self._nextscore
+
+        def reset(self):
+            self.child.reset()
+            self._gather()
+
+        def next(self):
+            if self.child.is_active():
+                self._gather()
+            else:
+                if self._nextdoc is None:
+                    from whoosh.matching import ReadTooFar
+
+                    raise ReadTooFar
+                else:
+                    self._nextdoc = None
+
+        def skip_to(self, id):
+            self.child.skip_to(id)
+            self._gather()
+
+        def value(self):
+            raise NotImplementedError(self.__class__)
+
+
 def BooleanQuery(required, should, prohibited):
     return AndNot(AndMaybe(And(required), Or(should)),
                   Or(prohibited)).normalize()

src/whoosh/searching.py

 from whoosh import classify, highlight, query, scoring, sorting
 from whoosh.compat import iteritems, itervalues, iterkeys, xrange
 from whoosh.reading import TermNotFound
-from whoosh.support.bitvector import BitSet, BitVector
+from whoosh.support.bitvector import DocIdSet, BitSet
 from whoosh.util import now, lru_cache
 
 
     def _filter_to_comb(self, obj):
         if obj is None:
             return None
-        if isinstance(obj, (set, BitVector, BitSet)):
+        if isinstance(obj, (set, DocIdSet)):
             c = obj
         elif isinstance(obj, Results):
             c = obj.docset

src/whoosh/support/bench.py

 from optparse import OptionParser
 from shutil import rmtree
 
-from whoosh import index, qparser, query
+from whoosh import index, qparser, query, scoring
 from whoosh.util import now, find_object
 
 try:
         if self.options.pool:
             poolclass = find_object(self.options.pool)
 
-        kwargs = dict(limitmb=int(self.options.limitmb), poolclass=poolclass,
-                      dir=self.options.tempdir, procs=int(self.options.procs),
-                      batchsize=int(self.options.batch))
-
-        if self.options.expw:
-            from whoosh.filedb.multiproc import MultiSegmentWriter
-            self.writer = MultiSegmentWriter(ix, **kwargs)
-        else:
-            self.writer = ix.writer(**kwargs)
-
+        self.writer = ix.writer(limitmb=int(self.options.limitmb),
+                                poolclass=poolclass,
+                                dir=self.options.tempdir,
+                                procs=int(self.options.procs),
+                                batchsize=int(self.options.batch),
+                                multisegment=self.options.xms)
         self._procdoc = None
         if hasattr(self.bench.spec, "process_document_whoosh"):
             self._procdoc = self.bench.spec.process_document_whoosh
         path = os.path.join(self.options.dir, "%s_whoosh"
                             % self.options.indexname)
         ix = index.open_dir(path)
-        self.srch = ix.searcher()
+        self.srch = ix.searcher(weighting=scoring.PL2())
         self.parser = qparser.QueryParser(self.bench.spec.main_field,
                                           schema=ix.schema)
 
         return self.parser.parse(qstring)
 
     def find(self, q):
-        return self.srch.search(q, limit=int(self.options.limit))
+        return self.srch.search(q, limit=int(self.options.limit),
+                                optimize=self.options.optimize)
 
     def findterms(self, terms):
         limit = int(self.options.limit)
                      help="Whoosh temp dir", default=None)
         p.add_option("-P", "--pool", dest="pool", metavar="CLASSNAME",
                      help="Whoosh pool class", default=None)
-        p.add_option("-X", "--expw", dest="expw", action="store_true",
-                     help="Use experimental whoosh writer", default=False)
+        p.add_option("-X", "--xms", dest="xms", action="store_true",
+                     help="Experimental Whoosh feature", default=False)
         p.add_option("-Z", "--storebody", dest="storebody", action="store_true",
                      help="Store the body text in index", default=False)
         p.add_option("-q", "--snippets", dest="snippets", action="store_true",
                      help="Show highlighted snippets", default=False)
+        p.add_option("-O", "--no-optimize", dest="optimize", action="store_false",
+                     help="Turn off searcher optimization", default=True)
 
         return p
 

src/whoosh/support/bitvector.py

 
 import operator
 from array import array
+from bisect import bisect_left, insort
 
 from whoosh.compat import xrange
 
-#: Table of the number of '1' bits in each byte (0-255)
-BYTE_COUNTS = array('B', [
-    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8])
 
+# Number of '1' bits in each byte (0-255)
+_1SPERBYTE = array('B', [0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2,
+2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,
+3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3,
+3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5,
+5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4,
+3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
+5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,
+3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4,
+4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7,
+6, 7, 7, 8])
 
-class BitVector(object):
+
+def _inverted_list(a, size):
+        pos = 0
+        value = a[pos]
+        for x in xrange(size):
+            if pos < len(a) and value == x:
+                pos += 1
+                value = a[pos]
+            else:
+                yield x
+
+
+class DocIdSet(object):
+    """Base class for a set of positive integers, implementing a subset of the
+    built-in ``set`` type's interface with extra docid-related methods.
+
+    This is a superclass for alternative set implementations to the built-in
+    ``set`` which are more memory-efficient and specialized toward storing
+    sorted lists of positive integers, though they will inevitably be slower
+    than ``set`` for most operations since they're pure Python.
     """
-    Implements a memory-efficient array of bits.
-    
-    >>> bv = BitVector(10)
-    >>> bv
-    <BitVector 0000000000>
-    >>> bv[5] = True
-    >>> bv
-    <BitVector 0000010000>
-    
-    You can initialize the BitVector using an iterable of integers representing
-    bit positions to turn on.
-    
-    >>> bv2 = BitVector(10, [2, 4, 7])
-    >>> bv2
-    <BitVector 00101001000>
-    >>> bv[2]
-    True
-    
-    BitVector supports bit-wise logic operations & (and), | (or), and ^ (xor)
-    between itself and another BitVector of equal size, or itself and a
-    collection of integers (usually a set() or frozenset()).
-    
-    >>> bv | bv2
-    <BitVector 00101101000>
-    
-    Note that ``BitVector.__len__()`` returns the number of "on" bits, not
-    the size of the bit array. This is to make BitVector interchangeable with
-    a set()/frozenset() of integers. To get the size, use BitVector.size.
+
+    def __eq__(self, other):
+        from itertools import izip
+
+        for a, b in izip(self, other):
+            if a != b:
+                return False
+        return True
+
+    def __neq__(self, other):
+        return not self.__eq__(other)
+
+    def __len__(self):
+        raise NotImplementedError
+
+    def __iter__(self):
+        raise NotImplementedError
+
+    def __contains__(self):
+        raise NotImplementedError
+
+    def copy(self):
+        raise NotImplementedError
+
+    def add(self):
+        raise NotImplementedError
+
+    def discard(self):
+        raise NotImplementedError
+
+    def update(self):
+        raise NotImplementedError
+
+    def union(self):
+        raise NotImplementedError
+
+    def intersection(self):
+        raise NotImplementedError
+
+    def difference(self):
+        raise NotImplementedError
+
+    def intersection_update(self):
+        raise NotImplementedError
+
+    def difference_update(self):
+        raise NotImplementedError
+
+    def invert(self, size):
+        """Returns a new instance with numbers in the range ``[0 - size)``
+        except numbers that are in this set.
+        """
+
+        raise NotImplementedError
+
+    def invert_update(self, size):
+        """Updates the set in-place to contain numbers in the range
+        ``[0 - size)`` except numbers that are in this set.
+        """
+
+        raise NotImplementedError
+
+    def before(self):
+        """Returns the previous integer in the set before ``i``, or None.
+        """
+        raise NotImplementedError
+
+    def after(self):
+        """Returns the next integer in the set after ``i``, or None.
+        """
+        raise NotImplementedError
+
+    def cursor(self):
+        """Returns a :class:`DocIdCursor` for this set.
+        """
+
+        raise NotImplementedError
+
+
+class BitSet(DocIdSet):
+    """A DocIdSet backed by an array of bits. This can also be useful as a bit
+    array (e.g. for a Bloom filter). It is much more memory efficient than a
+    large built-in set of integers, but wastes memory for sparse sets.
     """
 
     def __init__(self, size, source=None, bits=None):
-        self.size = size
+        """
+        :param maxsize: the maximum size of the bit array.
+        :param source: an iterable of positive integers to add to this set.
+        :param bits: an array of unsigned bytes ("B") to use as the underlying
+            bit array. This is used by some of the object's methods.
+        """
 
         if bits:
             self.bits = bits
         else:
-            self.bits = array("B", ([0x00] * ((size >> 3) + 1)))
+            self.bits = array("B", (0 for _ in xrange(size // 8 + 1)))
 
         if source:
-            set = self.set
+            add = self.add
             for num in source:
-                set(num)
+                add(num)
 
-        self.bcount = None
+    def size(self):
+        return len(self.bits)
 
-    def __eq__(self, other):
-        if isinstance(other, BitVector):
-            return self.bits == other.bits
-        return False
+    def copy(self):
+        return self.__class__(bits=array("B", self.bits))
+
+    def _trim(self):
+        bits = self.bits
+        last = len(self.bits) - 1
+        while last >= 0 and not bits[last]:
+            last -= 1
+        del self.bits[last + 1:]
+
+    def _resize(self, tosize):
+        curlength = len(self.bits)
+        newlength = tosize // 8 + 1
+        if newlength > curlength:
+            self.bits.extend((0,) * (newlength - curlength))
+        elif newlength < curlength:
+            del self.bits[newlength + 1:]
+
+    def _zero_extra_bits(self, size):
+        bits = self.bits
+        spill = size - (len(bits) - 1) * 8
+        if spill:
+            mask = 2 ** spill - 1
+            bits[-1] = bits[-1] & mask
+
+    def _logic(self, obj, op, other):
+        from whoosh.util import izip_longest
+
+        objbits = obj.bits
+        for i, (byte1, byte2) in enumerate(izip_longest(objbits, other.bits,
+                                                        fillvalue=0)):
+            value = op(byte1, byte2) & 0xFF
+            if i >= len(objbits):
+                objbits.append(value)
+            else:
+                objbits[i] = value
+
+        obj._trim()
+        return obj
 
     def __repr__(self):
-        return "<BitVector %s/%s>" % (len(self), self.size)
+        return "%s(%r)" % (self.__class__.__name__, list(self))
 
     def __len__(self):
         # This returns the count of "on" bits instead of the size to
-        # make BitVector exchangeable with a set() object.
-        return self.count()
-
-    def __contains__(self, index):
-        byte = self.bits[index >> 3]
-        if not byte:
-            return False
-        return byte & (1 << (index & 7)) != 0
+        # make BitSet exchangeable with a set() object.
+        return sum(_1SPERBYTE[b] for b in self.bits)
 
     def __iter__(self):
-        get = self.__getitem__
-        for i in xrange(0, self.size):
-            if get(i):
+        contains = self.__contains__
+        for i in xrange(0, len(self.bits) * 8):
+            if contains(i):
                 yield i
 
-    def __str__(self):
-        get = self.__getitem__
-        return "".join("1" if get(i) else "0"
-                       for i in xrange(0, self.size))
-
     def __nonzero__(self):
-        return self.count() > 0
+        return any(n for n in self.bits)
 
     __bool__ = __nonzero__
 
-    def __getitem__(self, index):
-        return self.bits[index >> 3] & (1 << (index & 7)) != 0
+    def __contains__(self, i):
+        bucket = i >> 3
+        return self.bits[bucket] & (1 << (i & 7))
 
-    def __setitem__(self, index, value):
-        if value:
-            self.set(index)
-        else:
-            self.clear(index)
+    def add(self, i):
+        bucket = i >> 3
+        self.bits[bucket] |= 1 << (i & 7)
 
-    def _logic(self, op, bitv):
-        if self.size != bitv.size:
-            raise ValueError("Can't combine bitvectors of different sizes")
-        res = BitVector(size=self.size)
-        lpb = list(map(op, self.bits, bitv.bits))
-        res.bits = array('B', lpb)
-        return res
+    def discard(self, i):
+        bucket = i >> 3
+        self.bits[bucket] &= ~(1 << (i & 7))
+
+    def update(self, iterable):
+        add = self.add
+        for i in iterable:
+            add(i)
 
     def union(self, other):
-        return self.__or__(other)
+        if isinstance(other, BitSet):
+            return self._logic(self.copy(), operator.__or__, other)
+        b = self.copy()
+        b.update(other)
+        return b
 
     def intersection(self, other):
-        return self.__and__(other)
+        if isinstance(other, BitSet):
+            return self._logic(self.copy(), operator.__and__, other)
+        return BitSet(source=(n for n in self if n in other))
 
-    def __and__(self, other):
-        if not isinstance(other, BitVector):
-            other = BitVector(self.size, source=other)
-        return self._logic(operator.__and__, other)
+    def difference(self, other):
+        if isinstance(other, BitSet):
+            return self._logic(self.copy(), lambda x, y: x & ~y, other)
+        return BitSet(source=(n for n in self if n not in other))
 
-    def __or__(self, other):
-        if not isinstance(other, BitVector):
-            other = BitVector(self.size, source=other)
-        return self._logic(operator.__or__, other)
+    def intersection_update(self, other):
+        if isinstance(other, BitSet):
+            return self._logic(self, operator.__and__, other)
+        discard = self.discard
+        for n in self:
+            if n not in other:
+                discard(n)
 
-    def __ror__(self, other):
-        return self.__or__(other)
+    def difference_update(self, other):
+        if isinstance(other, BitSet):
+            return self._logic(self, lambda x, y: x & ~y, other)
+        discard = self.discard
+        for n in other:
+            discard(n)
 
-    def __rand__(self, other):
-        return self.__and__(other)
+    def invert(self, size=None):
+        if size is None:
+            size = len(self.bits)
+        b = self.copy()
+        b.invert_update(size)
+        return b
 
-    def __xor__(self, other):
-        if not isinstance(other, BitVector):
-            other = BitVector(self.size, source=other)
-        return self._logic(operator.__xor__, other)
+    def invert_update(self, size=None):
+        if size is None:
+            size = len(self.bits)
+        bits = self.bits
+        for i in xrange(len(bits)):
+            # On the last byte, mask the result to just the "spillover" bits
+            bits[i] = ~bits[i] & 0xFF
+        self._zero_extra_bits(size)
 
-    def __invert__(self):
-        return BitVector(self.size, source=(x for x in xrange(self.size)
-                                            if x not in self))
+    def before(self, i):
+        bits = self.bits
+        size = len(bits) * 8
+        if i <= 0:
+            return None
+        elif i >= size:
+            i = size - 1
+        else:
+            i -= 1
+        bucket = i // 8
 
-    def count(self):
-        """Returns the number of "on" bits in the bit array."""
+        while i >= 0:
+            byte = bits[bucket]
+            if not byte:
+                bucket -= 1
+                i = bucket * 8 + 7
+                continue
+            if byte & (1 << (i & 7)):
+                return i
+            if i % 8 == 0:
+                bucket -= 1
+            i -= 1
 
-        if self.bcount is None:
-            self.bcount = sum(BYTE_COUNTS[b & 0xFF] for b in self.bits)
-        return self.bcount
+        return None
 
-    def set(self, index):
-        """Turns the bit at the given position on."""
+    def after(self, i):
+        bits = self.bits
+        size = len(bits) * 8
+        if i >= size:
+            return None
+        elif i < 0:
+            i = 0
+        else:
+            i += 1
+        bucket = i // 8
 
-        if index >= self.size:
-            raise IndexError("Position %s greater than the size of the vector"
-                             % repr(index))
-        self.bits[index >> 3] |= 1 << (index & 7)
-        self.bcount = None
+        while i < size:
+            byte = bits[bucket]
+            if not byte:
+                bucket += 1
+                i = bucket * 8
+                continue
+            if byte & (1 << (i & 7)):
+                return i
+            i += 1
+            if i % 8 == 0:
+                bucket += 1
 
-    def clear(self, index):
-        """Turns the bit at the given position off."""
+        return None
 
-        self.bits[index >> 3] &= ~(1 << (index & 7))
-        self.bcount = None
 
-    def update(self, iterable):
-        """Takes an iterable of integers representing positions, and turns
-        on the bits at those positions.
-        """
+class SortedIntSet(DocIdSet):
+    """A DocIdSet backed by a sorted array of integers.
+    """
 
-        set = self.set
-        for index in iterable:
-            set(index)
+    def __init__(self, source=None):
+        data = array("I")
+        if source is not None:
+            for i in source:
+                insort(data, i)
+        self.data = data
+        self._last = None
 
     def copy(self):
-        """Returns a copy of this BitArray."""
+        sis = SortedIntSet()
+        sis.data = array("I", self.data)
+        return sis
 
-        return BitVector(self.size, bits=self.bits)
-
-
-class BitSet(object):
-    """A set-like object for holding positive integers. It is dynamically
-    backed by either a set or BitVector depending on how many numbers are in
-    the set.
-    
-    Provides ``add``, ``remove``, ``union``, ``intersection``,
-    ``__contains__``, ``__len__``, ``__iter__``, ``__and__``, ``__or__``, and
-    methods.
-    """
-
-    def __init__(self, size, source=None):
-        self.size = size
-
-        self._back = ()
-        self._switch(size < 256)
-
-        if source:
-            for num in source:
-                self.add(num)
-
-    def _switch(self, toset):
-        if toset:
-            self._back = set(self._back)
-            self.add = self._set_add
-            self.remove = self._back.remove
-        else:
-            self._back = BitVector(self.size, source=self._back)
-            self.add = self._back.set
-            self.remove = self._vec_remove
-
-        self.update = self._back.update
-
-    def __contains__(self, n):
-        return n in self._back
+    def size(self):
+        return len(self.data) * self.data.itemsize
 
     def __repr__(self):
-        return "<%s %s/%s>" % (self.__class__.__name__, len(self._back),
-                               self.size)
+        return "%s(%r)" % (self.__class__.__name__, self.data)
 
     def __len__(self):
-        return len(self._back)
+        return len(self.data)
 
     def __iter__(self):
-        return self._back.__iter__()
+        return iter(self.data)
 
-    def as_set(self):
-        return frozenset(self._back)
+    def __nonzero__(self):
+        return bool(self.data)
+
+    __bool__ = __nonzero__
+
+    def __contains__(self, i):
+        data = self.data
+        lo = 0
+        hi = len(data)
+        if self._last:
+            v, pos = self._last
+            if i == v:
+                return True
+            elif i < v:
+                lo = pos
+            else:
+                hi = pos
+
+        pos = bisect_left(data, i, lo=lo, hi=hi)
+        if pos == len(data):
+            return False
+        v = data[pos]
+        self._last = (v, pos)
+        return v == i
+
+    def add(self, i):
+        data = self.data
+        if not data or i > data[-1]:
+            data.append(i)
+        else:
+            mn = data[0]
+            mx = data[-1]
+            if i == mn or i == mx:
+                return
+            elif i > mx:
+                data.append(i)
+            elif i < mn:
+                data.insert(0, i)
+            else:
+                pos = bisect_left(data, i)
+                if data[pos] != i:
+                    data.insert(pos, i)
+
+    def discard(self, i):
+        data = self.data
+        pos = bisect_left(data, i)
+        if data[pos] == i:
+            data.pop(pos)
+
+    def update(self, other):
+        add = self.add
+        for i in other:
+            add(i)
 
     def union(self, other):
-        return self.__or__(other)
+        sis = self.copy()
+        sis.update(other)
+        return sis
 
     def intersection(self, other):
-        return self.__and__(other)
+        return SortedIntSet((num for num in self if num in other))
 
-    def invert(self):
-        return BitSet(self.size, (x for x in xrange(self.size)
-                                  if x not in self))
+    def difference(self, other):
+        return SortedIntSet((num for num in self if num not in other))
 
-    def __and__(self, other):
-        return BitSet(self.size, self._back.intersection(other))
+    def intersection_update(self, other):
+        self.data = array("I", (num for num in self if num in other))
 
-    def __or__(self, other):
-        return BitSet(self.size, self._back.union(other))
+    def difference_update(self, other):
+        self.data = array("I", (num for num in self if num not in other))
 
-    def __rand__(self, other):
-        return self.__and__(other)
+    def invert(self, size):
+        return SortedIntSet(_inverted_list(self.data, size))
 
-    def __ror__(self, other):
-        return self.__or__(other)
+    def invert_update(self, size):
+        self.data = array("I", _inverted_list(self.data, size))
 
-    def __invert__(self):
-        return self.invert()
+    def before(self, i):
+        data = self.data
+        pos = bisect_left(data, i)
+        if pos < 1:
+            return None
+        else:
+            return data[pos - 1]
 
-    def _set_add(self, num):
-        self._back.add(num)
-        if len(self._back) * 4 > self.size // 8 + 32:
-            self._switch(False)
+    def after(self, i):
+        data = self.data
+        pos = bisect_left(data, i)
+        if pos >= len(data) - 1:
+            return None
+        else:
+            return data[pos + 1]
 
-    def _vec_remove(self, num):
-        self._back.clear(num)
-        if len(self._back) * 4 < self.size // 8 - 32:
-            self._switch(True)
+
+
+
+

src/whoosh/util.py

             else:
                 return
 
+try:
+    from itertools import izip_longest  # @UnusedImport
+except ImportError:
+    from itertools import chain, izip, repeat
+
+    # This function was only added to itertools in 2.6...
+    def izip_longest(*args, **kwds):
+        fillvalue = kwds.get('fillvalue')
+
+        def sentinel(counter=([fillvalue] * (len(args) - 1)).pop):
+            yield counter()
+
+        fillers = repeat(fillvalue)
+        iters = [chain(it, sentinel(), fillers) for it in args]
+        try:
+            for tup in izip(*iters):
+                yield tup
+        except IndexError:
+            pass
 
 try:
     from operator import methodcaller  # @UnusedImport

src/whoosh/writing.py

 from __future__ import with_statement
 import threading
 import time
+from contextlib import contextmanager
 
 from whoosh.store import LockError
 from whoosh.util import abstractmethod, synchronized
     pass
 
 
+# Document grouping context manager
+
+@contextmanager
+def groupmanager(writer):
+    writer.start_group()
+    yield
+    writer.end_group()
+
+
 # Base class
 
 class IndexWriter(object):
     """High-level object for writing to an index.
-    
+
     To get a writer for a particular index, call
     :meth:`~whoosh.index.Index.writer` on the Index object.
-    
+
     >>> writer = myindex.writer()
-    
+
     You can use this object as a context manager. If an exception is thrown
     from within the context it calls :meth:`~IndexWriter.cancel` to clean up
     temporary files, otherwise it calls :meth:`~IndexWriter.commit` when the
     context exits.
-    
+
     >>> with myindex.writer() as w:
     ...     w.add_document(title="First document", content="Hello there.")
     ...     w.add_document(title="Second document", content="This is easy!")
         else:
             self.commit()
 
+    def group(self):
+        """Returns a context manager that calls
+        :meth:`~IndexWriter.start_group` and :meth:`~IndexWriter.end_group` for
+        you, allowing you to use a ``with`` statement to group hierarchical
+        documents::
+        
+            with myindex.writer() as w:
+                with w.group():
+                    w.add_document(kind="class", name="Accumulator")
+                    w.add_document(kind="method", name="add")
+                    w.add_document(kind="method", name="get_result")
+                    w.add_document(kind="method", name="close")
+                
+                with w.group():
+                    w.add_document(kind="class", name="Calculator")
+                    w.add_document(kind="method", name="add")
+                    w.add_document(kind="method", name="multiply")
+                    w.add_document(kind="method", name="get_result")
+                    w.add_document(kind="method", name="close")
+        """
+
+        return groupmanager(self)
+
+    def start_group(self):
+        """Start indexing a group of hierarchical documents. The backend should
+        ensure that these documents are all added to the same segment::
+        
+            with myindex.writer() as w:
+                w.start_group()
+                w.add_document(kind="class", name="Accumulator")
+                w.add_document(kind="method", name="add")
+                w.add_document(kind="method", name="get_result")
+                w.add_document(kind="method", name="close")
+                w.end_group()
+                
+                w.start_group()
+                w.add_document(kind="class", name="Calculator")
+                w.add_document(kind="method", name="add")
+                w.add_document(kind="method", name="multiply")
+                w.add_document(kind="method", name="get_result")
+                w.add_document(kind="method", name="close")
+                w.end_group()
+        
+        A more convenient way to group documents is to use the
+        :meth:`~IndexWriter.group` method and the ``with`` statement.
+        """
+
+        pass
+
+    def end_group(self):
+        """Finish indexing a group of hierarchical documents. See
+        :meth:`~IndexWriter.start_group`.
+        """
+
+        pass
+
     def add_field(self, fieldname, fieldtype, **kwargs):
         """Adds a field to the index's schema.
-        
+
         :param fieldname: the name of the field to add.
         :param fieldtype: an instantiated :class:`whoosh.fields.FieldType`
             object.
         """Deletes any documents containing "term" in the "fieldname" field.
         This is useful when you have an indexed field containing a unique ID
         (such as "pathname") for each document.
-        
+
         :returns: the number of documents deleted.
         """
 
 
     def delete_by_query(self, q, searcher=None):
         """Deletes any documents matching a query object.
-        
+
         :returns: the number of documents deleted.
         """
 
     @abstractmethod
     def add_document(self, **fields):
         """The keyword arguments map field names to the values to index/store::
-        
+
             w = myindex.writer()
             w.add_document(path=u"/a", title=u"First doc", text=u"Hello")
             w.commit()
-        
+
         Depending on the field type, some fields may take objects other than
         unicode strings. For example, NUMERIC fields take numbers, and DATETIME
         fields take ``datetime.datetime`` objects::
-        
+
             from datetime import datetime, timedelta
             from whoosh import index
             from whoosh.fields import *
-            
+
             schema = Schema(date=DATETIME, size=NUMERIC(float), content=TEXT)
             myindex = index.create_in("indexdir", schema)
-            
+
             w = myindex.writer()
             w.add_document(date=datetime.now(), size=5.5, content=u"Hello")
             w.commit()
-        
+
         Instead of a single object (i.e., unicode string, number, or datetime),
         you can supply a list or tuple of objects. For unicode strings, this
         bypasses the field's analyzer. For numbers and dates, this lets you add
         multiple values for the given field::
-        
+
             date1 = datetime.now()
             date2 = datetime(2005, 12, 25)
             date3 = datetime(1999, 1, 1)
             w.add_document(date=[date1, date2, date3], size=[9.5, 10],
                            content=[u"alfa", u"bravo", u"charlie"])
-        
+
         For fields that are both indexed and stored, you can specify an
         alternate value to store using a keyword argument in the form
         "_stored_<fieldname>". For example, if you have a field named "title"
         and you want to index the text "a b c" but store the text "e f g", use
         keyword arguments like this::
-        
+
             writer.add_document(title=u"a b c", _stored_title=u"e f g")
-        
+
         You can boost the weight of all terms in a certain field by specifying
         a ``_<fieldname>_boost`` keyword argument. For example, if you have a
         field named "content", you can double the weight of this document for
         searches in the "content" field like this::
-        
+
             writer.add_document(content="a b c", _title_boost=2.0)
-            
+
         You can boost every field at once using the ``_boost`` keyword. For
         example, to boost fields "a" and "b" by 2.0, and field "c" by 3.0::
-        
+
             writer.add_document(a="alfa", b="bravo", c="charlie",
                                 _boost=2.0, _c_boost=3.0)
-        
+
         Note that some scoring algroithms, including Whoosh's default BM25F,
         do not work with term weights less than 1, so you should generally not
         use a boost factor less than 1.
-        
+
         See also :meth:`Writer.update_document`.
         """
 
 
     def update_document(self, **fields):
         """The keyword arguments map field names to the values to index/store.
-        
+
         This method adds a new document to the index, and automatically deletes
         any documents with the same values in any fields marked "unique" in the
         schema::
-        
+
             schema = fields.Schema(path=fields.ID(unique=True, stored=True),
                                    content=fields.TEXT)
             myindex = index.create_in("index", schema)
-        
+
             w = myindex.writer()
             w.add_document(path=u"/", content=u"Mary had a lamb")
             w.commit()
-            
+
             w = myindex.writer()
             w.update_document(path=u"/", content=u"Mary had a little lamb")
             w.commit()
-            
+
             assert myindex.doc_count() == 1
-        
+
         It is safe to use ``update_document`` in place of ``add_document``; if
         there is no existing document to replace, it simply does an add.
-        
+
         You cannot currently pass a list or tuple of values to a "unique"
         field.
-        
+
         Because this method has to search for documents with the same unique
         fields and delete them before adding the new document, it is slower
         than using ``add_document``.
-        
+
         * Marking more fields "unique" in the schema will make each
           ``update_document`` call slightly slower.
-        
+
         * When you are updating multiple documents, it is faster to batch
           delete all changed documents and then use ``add_document`` to add
           the replacements instead of using ``update_document``.
-        
+
         Note that this method will only replace a *committed* document;
         currently it cannot replace documents you've added to the IndexWriter
         but haven't yet committed. For example, if you do this:
-        
+
         >>> writer.update_document(unique_id=u"1", content=u"Replace me")
         >>> writer.update_document(unique_id=u"1", content=u"Replacement")
-        
+
         ...this will add two documents with the same value of ``unique_id``,
         instead of the second document replacing the first.
-        
+
         See :meth:`Writer.add_document` for information on
         ``_stored_<fieldname>``, ``_<fieldname>_boost``, and ``_boost`` keyword
         arguments.
     (i.e. the ``filedb`` writer). This object will attempt once to obtain the
     underlying writer, and if it's successful, will simply pass method calls on
     to it.
-    
+
     If this object *can't* obtain a writer immediately, it will *buffer*
     delete, add, and update method calls in memory until you call ``commit()``.
     At that point, this object will start running in a separate thread, trying
     to obtain the writer over and over, and once it obtains it, "replay" all
     the buffered method calls on it.
-    
+
     In a typical scenario where you're adding a single or a few documents to
     the index as the result of a Web transaction, this lets you just create the
     writer, add, and commit, without having to worry about index locks,
     retries, etc.
-    
+
     For example, to get an aynchronous writer, instead of this:
-    
+
     >>> writer = myindex.writer()
-    
+
     Do this:
-    
+
     >>> from whoosh.writing import AsyncWriter
     >>> writer = AsyncWriter(myindex)
     """
     """Convenience class that acts like a writer but buffers added documents to
     a :class:`~whoosh.ramindex.RamIndex` before dumping the buffered documents
     as a batch into the actual index.
-    
+
     In scenarios where you are continuously adding single documents very
     rapidly (for example a web application where lots of users are adding
     content simultaneously), using a BufferedWriter is *much* faster than
     opening and committing a writer for each document you add.
-    
+
     (This class may also be useful for batches of ``update_document`` calls. In
     a normal writer, ``update_document`` calls cannot update documents you've
     added *in that writer*. With ``BufferedWriter``, this will work.)
-    
+
     If you're adding a batches of documents at a time, you can just use a
     regular writer -- you're already committing a "batch" of documents, so you
     don't need this class.
-    
+
     To use this class, create it from your index and *keep it open*, sharing
     it between threads.
-    
+
     >>> from whoosh.writing import BufferedWriter
     >>> writer = BufferedWriter(myindex, period=120, limit=100)
-    
+
     You can control how often the ``BufferedWriter`` flushes the in-memory
     index to disk using the ``period`` and ``limit`` arguments. ``period`` is
     the maximum number of seconds between commits. ``limit`` is the maximum
     number of additions to buffer between commits.
-    
+
     You can read/search the combination of the on-disk index and the buffered
     documents in memory by calling ``BufferedWriter.reader()`` or
     ``BufferedWriter.searcher()``. This allows quasi-real-time search, where
     documents are available for searching as soon as they are buffered in
     memory, before they are committed to disk.
-    
+
     >>> searcher = writer.searcher()
-    
+
     .. tip::
         By using a searcher from the shared writer, multiple *threads* can
         search the buffered documents. Of course, other *processes* will only
         see the documents that have been written to disk. If you want indexed
         documents to become available to other processes as soon as possible,
         you have to use a traditional writer instead of a ``BufferedWriter``.
-    
+
     Calling ``commit()`` on the ``BufferedWriter`` manually commits any batched
     up changes. You can continue to make changes after calling ``commit()``,
     and you can call ``commit()`` multiple times.
-    
+
     .. note::
         This object keeps an underlying writer open and stores documents in
         memory, so you must explicitly call the :meth:`~BufferedWriter.close()`

tests/test_nested.py

+from __future__ import with_statement
+from nose.tools import assert_equal  #@UnresolvedImport
+
+from whoosh import fields, query, scoring
+from whoosh.compat import u
+from whoosh.filedb.filestore import RamStorage
+
+
+def test_nested():
+    schema = fields.Schema(name=fields.ID(stored=True), type=fields.ID,
+                           part=fields.ID, price=fields.NUMERIC)
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        with w.group():
+            w.add_document(name=u("iPad"), type=u("product"))
+            w.add_document(part=u("screen"), price=100)
+            w.add_document(part=u("battery"), price=50)
+            w.add_document(part=u("case"), price=20)
+
+        with w.group():
+            w.add_document(name=u("iPhone"), type=u("product"))
+            w.add_document(part=u("screen"), price=60)
+            w.add_document(part=u("battery"), price=30)
+            w.add_document(part=u("case"), price=10)
+
+        with w.group():
+            w.add_document(name=u("Mac mini"), type=u("product"))
+            w.add_document(part=u("hard drive"), price=50)
+            w.add_document(part=u("case"), price=50)
+
+    with ix.searcher() as s:
+        price = s.schema["price"]
+
+        pq = query.Term("type", "product")
+        cq = query.Term("price", price.to_text(50))
+        q = query.Nested(pq, cq)
+
+        r = s.search(q)
+        assert_equal(sorted([hit["name"] for hit in r]), ["Mac mini", "iPad"])
+
+
+def test_scoring():
+    schema = fields.Schema(kind=fields.ID,
+                           name=fields.KEYWORD(scorable=True, stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Index"))
+            w.add_document(kind=u("method"), name=u("add document"))
+            w.add_document(kind=u("method"), name=u("add reader"))
+            w.add_document(kind=u("method"), name=u("close"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Accumulator"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("get result"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Calculator"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("add all"))
+            w.add_document(kind=u("method"), name=u("add some"))
+            w.add_document(kind=u("method"), name=u("multiply"))
+            w.add_document(kind=u("method"), name=u("close"))
+
+    with ix.searcher() as s:
+        q = query.Nested(query.Term("kind", "class"),
+                         query.Term("name", "add"))
+        r = s.search(q)
+        assert_equal([hit["name"] for hit in r], ["Calculator", "Index",
+                                                  "Accumulator"])
+
+
+def test_deletion():
+    schema = fields.Schema(kind=fields.ID,
+                           name=fields.KEYWORD(scorable=True, stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Index"))
+            w.add_document(kind=u("method"), name=u("add document"))
+            w.add_document(kind=u("method"), name=u("add reader"))
+            w.add_document(kind=u("method"), name=u("close"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Accumulator"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("get result"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Calculator"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("add all"))
+            w.add_document(kind=u("method"), name=u("add some"))
+            w.add_document(kind=u("method"), name=u("multiply"))
+            w.add_document(kind=u("method"), name=u("close"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Deleter"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("delete"))
+
+    with ix.searcher() as s:
+        q = query.Nested(query.Term("kind", "class"),
+                         query.Term("name", "add"))
+
+        r = s.search(q)
+        assert_equal([hit["name"] for hit in r], ["Calculator", "Index",
+                                                  "Accumulator", "Deleter"])
+
+    with ix.writer() as w:
+        w.delete_by_term("name", "Accumulator")
+        w.delete_by_term("name", "Calculator")
+
+    with ix.searcher() as s:
+        pq = query.Term("kind", "class")
+        assert_equal(len(list(pq.docs(s))), 2)
+        q = query.Nested(pq, query.Term("name", "add"))
+        r = s.search(q)
+        assert_equal([hit["name"] for hit in r], ["Index", "Deleter"])
+
+
+def test_all_parents_deleted():
+    schema = fields.Schema(kind=fields.ID,
+                           name=fields.KEYWORD(scorable=True, stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Index"))
+            w.add_document(kind=u("method"), name=u("add document"))
+            w.add_document(kind=u("method"), name=u("add reader"))
+            w.add_document(kind=u("method"), name=u("close"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Accumulator"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("get result"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Calculator"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("add all"))
+            w.add_document(kind=u("method"), name=u("add some"))
+            w.add_document(kind=u("method"), name=u("multiply"))
+            w.add_document(kind=u("method"), name=u("close"))
+        with w.group():
+            w.add_document(kind=u("class"), name=u("Deleter"))
+            w.add_document(kind=u("method"), name=u("add"))
+            w.add_document(kind=u("method"), name=u("delete"))
+
+    with ix.writer() as w:
+        w.delete_by_term("name", "Index")
+        w.delete_by_term("name", "Accumulator")
+        w.delete_by_term("name", "Calculator")
+        w.delete_by_term("name", "Deleter")
+
+    with ix.searcher() as s:
+        q = query.Nested(query.Term("kind", "class"),
+                         query.Term("name", "add"))
+        r = s.search(q)
+        assert r.is_empty()
+
+
+def test_everything_is_a_parent():
+    schema = fields.Schema(id=fields.STORED, kind=fields.ID,
+                           name=fields.ID(stored=True))
+    k = u("alfa")
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(id=0, kind=k, name=u("one"))
+        w.add_document(id=1, kind=k, name=u("two"))
+        w.add_document(id=2, kind=k, name=u("three"))
+        w.add_document(id=3, kind=k, name=u("four"))
+        w.add_document(id=4, kind=k, name=u("one"))
+        w.add_document(id=5, kind=k, name=u("two"))
+        w.add_document(id=6, kind=k, name=u("three"))
+        w.add_document(id=7, kind=k, name=u("four"))
+        w.add_document(id=8, kind=k, name=u("one"))
+        w.add_document(id=9, kind=k, name=u("two"))
+        w.add_document(id=10, kind=k, name=u("three"))
+        w.add_document(id=11, kind=k, name=u("four"))
+
+    with ix.searcher() as s:
+        pq = query.Term("kind", k)
+        cq = query.Or([query.Term("name", "two"), query.Term("name", "four")])
+        q = query.Nested(pq, cq)
+        r = s.search(q)
+        assert_equal([hit["id"] for hit in r], [1, 3, 5, 7, 9, 11])
+
+
+def test_no_parents():
+    schema = fields.Schema(id=fields.STORED, kind=fields.ID,
+                           name=fields.ID(stored=True))
+    k = u("alfa")
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(id=0, kind=k, name=u("one"))
+        w.add_document(id=1, kind=k, name=u("two"))
+        w.add_document(id=2, kind=k, name=u("three"))
+        w.add_document(id=3, kind=k, name=u("four"))
+        w.add_document(id=4, kind=k, name=u("one"))
+        w.add_document(id=5, kind=k, name=u("two"))
+        w.add_document(id=6, kind=k, name=u("three"))
+        w.add_document(id=7, kind=k, name=u("four"))
+        w.add_document(id=8, kind=k, name=u("one"))
+        w.add_document(id=9, kind=k, name=u("two"))
+        w.add_document(id=10, kind=k, name=u("three"))
+        w.add_document(id=11, kind=k, name=u("four"))
+
+    with ix.searcher() as s:
+        pq = query.Term("kind", "bravo")
+        cq = query.Or([query.Term("name", "two"), query.Term("name", "four")])
+        q = query.Nested(pq, cq)
+        r = s.search(q)
+        assert r.is_empty()
+
+