Matt Chaput avatar Matt Chaput committed 5916d00 Merge

Merging bug fixes from default branch.

Comments (0)

Files changed (17)

+[wheel]
+universal = 1
+
 [build_sphinx]
 build-dir = docs/build
 source-dir = docs/source
 formats = zip,gztar
 
 [aliases]
-push = sdist upload
+push = sdist bdist_wheel upload
 pushdocs = build_sphinx upload_sphinx
 
 [pytest]

src/whoosh/analysis/filters.py

     """
 
     def __eq__(self, other):
-        return other and self.__class__ is other.__class__
+        return (other
+                and self.__class__ is other.__class__
+                and self.__dict__ == other.__dict__)
+
+    def __ne__(self, other):
+        return not self == other
 
     def __call__(self, tokens):
         raise NotImplementedError

src/whoosh/collectors.py

         if use_alarm:
             import signal
             self.use_alarm = use_alarm and hasattr(signal, "SIGALRM")
+        else:
+            self.use_alarm = False
 
         self.timer = None
         self.timedout = False

src/whoosh/columns.py

 
         return self.Reader(dbfile, basepos, length, doccount)
 
-    def default_value(self):
+    def default_value(self, reverse=False):
         """Returns the default value for this column type.
         """
 
     def __getitem__(self, docnum):
         raise NotImplementedError
 
-    def sort_key(self, docnum, reverse=False):
+    def sort_key(self, docnum):
         return self[docnum]
 
     def __iter__(self):
     def load(self):
         return list(self)
 
+    def set_reverse(self):
+        raise NotImplementedError
+
 
 # Arbitrary bytes column
 
         return self.Reader(dbfile, basepos, length, doccount, self._typecode,
                            self._default)
 
+    def default_value(self, reverse=False):
+        v = self._default
+        if reverse:
+            v = 0 - v
+        return v
+
     class Writer(FixedBytesColumn.Writer):
         def __init__(self, dbfile, typecode, default):
             self._dbfile = dbfile
             self._basepos = basepos
             self._doccount = doccount
             self._default = default
+            self._reverse = False
 
             self._typecode = typecode
             self._unpack = struct.Struct("!" + typecode).unpack
             s = FixedBytesColumn.Reader.__getitem__(self, docnum)
             return self._unpack(s)[0]
 
-        def sort_key(self, docnum, reverse=False):
+        def sort_key(self, docnum):
             key = self[docnum]
-            if reverse:
+            if self._reverse:
                 key = 0 - key
             return key
 
             else:
                 return array(self._typecode, self)
 
+        def set_reverse(self):
+            self._reverse = True
+
 
 # Column of boolean values
 
     def writer(self, dbfile):
         return self.Writer(dbfile, self._compressat)
 
+    def default_value(self, reverse=False):
+        return self._default ^ reverse
+
     class Writer(ColumnWriter):
         def __init__(self, dbfile, compressat):
             self._dbfile = dbfile
             self._basepos = basepos
             self._length = length
             self._doccount = doccount
+            self._reverse = False
 
             compressed = dbfile.get_byte(basepos + (length - 1))
             if compressed:
         def __getitem__(self, i):
             return i in self._bitset
 
-        def sort_key(self, docnum, reverse=False):
-            return int(self[docnum] ^ reverse)
+        def sort_key(self, docnum):
+            return int(self[docnum] ^ self._reverse)
 
         def __iter__(self):
             i = 0
                 self._bitset = BitSet.from_bytes(bs)
             return self
 
+        def set_reverse(self):
+            self._reverse = True
+
 
 # Compressed variants
 
     def __getitem__(self, docnum):
         return self._translate(self._reader[docnum])
 
-    def sort_key(self, docnum, reverse=False):
-        return self._reader.sort_key(docnum, reverse=reverse)
+    def sort_key(self, docnum):
+        return self._reader.sort_key(docnum)
 
     def __iter__(self):
         translate = self._translate
         return (translate(v) for v in self._reader)
 
+    def set_reverse(self):
+        self._reader.set_reverse()
+
 
 # Column wrappers
 
     def __getitem__(self, docnum):
         return self._child[docnum]
 
-    def sort_key(self, docnum, reverse=False):
-        return self._child.sort_key(docnum, reverse=reverse)
+    def sort_key(self, docnum):
+        return self._child.sort_key(docnum)
 
     def __iter__(self):
         return iter(self._child)
     def load(self):
         return list(self)
 
+    def set_reverse(self):
+        self._child.set_reverse()
+
 
 class ClampedNumericColumn(WrappedColumn):
     """An experimental wrapper type for NumericColumn that clamps out-of-range
 
 
 class ListColumnReader(ColumnReader):
-    def sort_key(self, docnum, reverse=False):
+    def sort_key(self, docnum):
         return self[docnum][0]
 
     def __iter__(self):

src/whoosh/fields.py

         # Raise an error if the user tries to use a type other than int or
         # float
         if numtype is Decimal:
-            raise TypeError("To store Decimal instances, set type to int use "
-                            "the decimal_places argument")
+            numtype = int
+            if not decimal_places:
+                raise TypeError("To store Decimal instances, you must set the "
+                                "decimal_places argument")
         elif numtype not in (int, float):
             raise TypeError("Can't use %r as a type, use int or float"
                             % numtype)
         dc = self.decimal_places
         if dc and isinstance(x, (string_type, Decimal)):
             x = Decimal(x) * (10 ** dc)
+        elif isinstance(x, Decimal):
+            raise TypeError("Can't index a Decimal object unless you specified "
+                            "decimal_places on the field")
 
         try:
             x = self.numtype(x)

src/whoosh/qparser/common.py

 
 def attach(q, stxnode):
     if q:
-        q.startchar = stxnode.startchar
-        q.endchar = stxnode.endchar
+        try:
+            q.startchar = stxnode.startchar
+            q.endchar = stxnode.endchar
+        except AttributeError:
+            raise AttributeError("Can't set attribute on %s"
+                                 % q.__class__.__name__)
     return q
 
 

src/whoosh/reading.py

     def has_column(self, fieldname):
         return False
 
-    def column_reader(self, fieldname):
+    def column_reader(self, fieldname, column=None, reverse=False,
+                      translate=False):
+        """
+
+        :param fieldname: the name of the field for which to get a reader.
+        :param column: if passed, use this Column object instead of the one
+            associated with the field in the Schema.
+        :param reverse: if passed, reverses the order of keys returned by the
+            reader's ``sort_key()`` method. If the column type is not
+            reversible, this will raise a ``NotImplementedError``.
+        :param translate: if True, wrap the reader to call the field's
+            ``from_bytes()`` method on the returned values.
+        :return: a :class:`whoosh.columns.ColumnReader` object.
+        """
+
         raise NotImplementedError
 
 
         coltype = self.schema[fieldname].column_type
         return coltype and self._perdoc.has_column(fieldname)
 
-    def column_reader(self, fieldname, column=None, translate=True):
+    def column_reader(self, fieldname, column=None, reverse=False,
+                      translate=True):
         if self.is_closed:
             raise ReaderClosed
+
         fieldobj = self.schema[fieldname]
-        if not self.has_column(fieldname):
-            raise Exception("No column for field %r" % fieldname)
+        column = column or fieldobj.column_type
+        if not column:
+            raise Exception("No column for field %r in %r"
+                            % (fieldname, self))
 
-        ctype = column or fieldobj.column_type
-        creader = self._perdoc.column_reader(fieldname, ctype)
+        if self._perdoc.has_column(fieldname):
+            creader = self._perdoc.column_reader(fieldname, column)
+            if reverse:
+                creader.set_reverse()
+        else:
+            # This segment doesn't have a column file for this field, so create
+            # a fake column reader that always returns the default value.
+            default = column.default_value(reverse)
+            creader = columns.EmptyColumnReader(default, self.doc_count_all())
+
         if translate:
             # Wrap the column in a Translator to give the caller
             # nice values instead of sortable representations
     def has_column(self, fieldname):
         return any(r.has_column(fieldname) for r in self.readers)
 
-    def column_reader(self, fieldname, translate=True):
-        column = self.schema[fieldname].column_type
+    def column_reader(self, fieldname, column=None, reverse=False,
+                      translate=True):
+        column = column or self.schema[fieldname].column_type
         if not column:
             raise Exception("Field %r has no column type" % (fieldname,))
 
-        default = column.default_value()
-        doccount = self.doc_count_all()
-
         creaders = []
         for r in self.readers:
-            if r.has_column(fieldname):
-                creaders.append(r.column_reader(fieldname, translate=translate))
-            else:
-                creaders.append(columns.EmptyColumnReader(default, doccount))
-
+            cr = r.column_reader(fieldname, column=column, reverse=reverse,
+                                 translate=translate)
+            creaders.append(cr)
         return columns.MultiColumnReader(creaders)

src/whoosh/redline.py

+import os.path
+import re
+import struct
+from bisect import bisect_left, bisect_right, insort
+from itertools import chain
+
+from whoosh.compat import next, xrange, iteritems
+from whoosh.compat import load, dump
+
+
+itemheader = struct.Struct("<Hi")
+ushort_struct = struct.Struct("<H")
+int_struct = struct.Struct("<i")
+uint_struct = struct.Struct("<I")
+long_struct = struct.Struct("<q")
+pack_ushort, unpack_ushort = ushort_struct.pack, ushort_struct.unpack
+pack_int, unpack_int = int_struct.pack, int_struct.unpack
+pack_uint, unpack_uint = uint_struct.pack, uint_struct.unpack
+pack_long, unpack_long = long_struct.pack, long_struct.unpack
+
+
+class Region(object):
+    def __init__(self, start, end, minkey, maxkey, length):
+        self.start = start
+        self.end = end
+        self.minkey = minkey
+        self.maxkey = maxkey
+        self.length = length
+
+    def __repr__(self):
+        return "<%s %r-%r>" % (self.__class__.__name__,
+                               self.minkey, self.maxkey)
+
+
+class RegionReader(object):
+    def __init__(self, dbfile, mm, region):
+        self._dbfile = dbfile
+        self._mm = mm
+        self._region = region
+
+        self._start = region.start
+        self._end = region.end
+        self.minkey = region.minkey
+        self.maxkey = region.maxkey
+        self._length = region.length
+        self.loaded = False
+
+        self._poses = None
+        self._index = None
+
+    def load(self):
+        f = self._dbfile
+        _read = f.read
+        _unpack = itemheader.unpack
+        _headersize = itemheader.size
+
+        pos = self._start
+        f.seek(pos)
+        for i in xrange(self._length):
+            keylen, vlen = _unpack(_read(_headersize))
+            pos += _headersize
+            key = _read(keylen)
+            pos += keylen
+
+            self._poses[key] = pos
+            pos += vlen
+
+        assert f.tell() == pos == self._end
+        self.loaded = True
+
+    def __getitem__(self, key):
+        pos = self._poses[key]
+        return self._mm[pos]
+
+
+def write_regions(dbfile, items, maxsize):
+    _write = dbfile.write
+    _pack = itemheader.pack
+    _headersize = itemheader.size
+
+    start = dbfile.tell()
+    minkey = None
+    size = 0
+    length = 0
+
+    key = None
+    for key, value in items:
+        if minkey is None:
+            minkey = key
+
+        _write(_pack(len(key), len(value)) + key + value)
+        size += _headersize + len(key) + len(value)
+        length += 1
+
+        if size >= maxsize:
+            end = dbfile.tell()
+            reg = Region(start, end, minkey, key, length)
+            yield reg
+
+            size = 0
+            length = 0
+            minkey = None
+            start = end
+
+    if length:
+        assert minkey is not None and key is not None
+        reg = Region(start, dbfile.tell(), minkey, key, length)
+        yield reg
+
+
+def read_region(dbfile, region, start=None):
+    _read = dbfile.read
+    _unpack = itemheader.unpack
+    _headersize = itemheader.size
+
+    start = start if start is not None else region.start
+    dbfile.seek(start)
+
+    first = True
+    for i in xrange(region.length):
+        keylen, vlen = _unpack(_read(_headersize))
+        key = _read(keylen)
+        val = _read(vlen)
+
+        if first:
+            assert key == region.minkey
+            first = False
+
+        yield key, val
+
+    assert dbfile.tell() == region.end
+
+
+def bisect_regions(regions, key):
+    # Find the index of the region that would contain the given key
+
+    lo = 0
+    hi = len(regions)
+    while lo < hi:
+        mid = (lo + hi) // 2
+        region = regions[mid]
+
+        if region.minkey <= key <= region.maxkey:
+            return mid
+        elif region.maxkey < key:
+            lo = mid + 1
+        else:
+            hi = mid
+
+    return lo
+
+
+def segment_keys(regions, keys):
+    if not keys:
+        return
+
+    k1 = keys[0]
+    kn = keys[-1]
+
+    if not regions or k1 > regions[-1].maxkey or kn < regions[0].minkey:
+        return [(keys, None)]
+
+    new = []
+    left = 0
+    r = bisect_regions(regions, k1)
+
+    while left < len(keys) and r < len(regions):
+        leftkey = keys[left]
+        region = regions[r]
+
+        if leftkey > region.maxkey:
+            r += 1
+        elif leftkey < region.minkey:
+            right = bisect_left(keys, region.minkey, left)
+            new.append((keys[left:right], None))
+            left = right
+        else:
+            right = bisect_right(keys, region.maxkey, left)
+            new.append((keys[left:right], region))
+            left = right
+            regions.pop(r)
+
+    if left < len(keys):
+        new.append((keys[left:], None))
+
+    return new
+
+
+def merge_items(olditems, newitems):
+    i = 0
+    _len = len(newitems)
+    for item in olditems:
+        key = item[0]
+
+        # Yield any items in newitems that come before the current key
+        # in the iterator
+        while i < _len and newitems[i][0] < key:
+            yield newitems[i]
+            i += 1
+
+        # newitems override olditems
+        if i < _len and newitems[i][0] == key:
+            item = newitems[i]
+            i += 1
+
+            # If the value is a tombstone, swallow the item
+            if item[1] is None:
+                continue
+
+        yield item
+
+    if i < _len:
+        for item in newitems[i:]:
+            yield item
+
+
+
+
+

src/whoosh/searching.py

         if isinstance(obj, (set, DocIdSet)):
             c = obj
         elif isinstance(obj, Results):
-            c = obj.docset
+            c = obj.docs()
         elif isinstance(obj, ResultsPage):
-            c = obj.results.docset
+            c = obj.results.docs()
         elif isinstance(obj, query.Query):
             c = self._query_to_comb(obj)
         else:

src/whoosh/sorting.py

     def __init__(self, global_searcher, fieldname, reverse=False):
         self._fieldname = fieldname
         self._fieldobj = global_searcher.schema[self._fieldname]
+        self._column_type = self._fieldobj.column_type
         self._reverse = reverse
 
+        # The column reader is set in set_searcher() as we iterate over the
+        # sub-searchers
+        self._creader = None
+
     def __repr__(self):
         return "%s(%r, %r, reverse=%r)" % (self.__class__.__name__,
                                            self._fieldobj, self._fieldname,
 
     def set_searcher(self, segment_searcher, docoffset):
         r = segment_searcher.reader()
-        self._creader = r.column_reader(self._fieldname, translate=False)
+        self._creader = r.column_reader(self._fieldname,
+                                        reverse=self._reverse,
+                                        translate=False)
 
     def key_for(self, matcher, segment_docnum):
-        return self._creader.sort_key(segment_docnum, self._reverse)
+        return self._creader.sort_key(segment_docnum)
 
     def key_to_name(self, key):
         return self._fieldobj.from_column_value(key)
         self._use_column = (reader.has_column(fieldname)
                             and field.column_type.stores_lists())
 
+        # These are set in set_searcher() as we iterate over the sub-searchers
+        self._segment_searcher = None
+        self._creader = None
+        self._lists = None
+
     def set_searcher(self, segment_searcher, docoffset):
         fieldname = self._fieldname
         self._segment_searcher = segment_searcher
                 self._add(item)
         self.maptype = maptype
 
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__,
+                               self.facets,
+                               self.maptype)
+
     @classmethod
     def from_sortedby(cls, sortedby):
         multi = cls()

tests/test_indexing.py

             assert paths == ["/a", "/b"]
 
 
+def test_index_decimals():
+    from decimal import Decimal
+
+    schema = fields.Schema(name=fields.KEYWORD(stored=True),
+                           num=fields.NUMERIC(int))
+    ix = RamStorage().create_index(schema)
+
+    with ix.writer() as w:
+        with pytest.raises(TypeError):
+            w.add_document(name=u("hello"), num=Decimal("3.2"))
+
+    schema = fields.Schema(name=fields.KEYWORD(stored=True),
+                           num=fields.NUMERIC(Decimal, decimal_places=5))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(name=u("hello"), num=Decimal("3.2"))
+
+
+

tests/test_keyval.py

+from __future__ import with_statement
+
+import os.path
+import random
+
+import pytest
+
+from whoosh import redline as kv
+from whoosh.compat import b, xrange
+from whoosh.util import now, random_name
+from whoosh.util.testing import TempDir
+
+
+def test_bisect_regions():
+    regions = [kv.Region(0, 0, "b", "d", 0),
+               kv.Region(0, 0, "f", "h", 0),
+               kv.Region(0, 0, "j", "m", 0)]
+
+    assert kv.bisect_regions(regions, "a") == 0
+    assert kv.bisect_regions(regions, "b") == 0
+    assert kv.bisect_regions(regions, "c") == 0
+    assert kv.bisect_regions(regions, "d") == 0
+    assert kv.bisect_regions(regions, "e") == 1
+    assert kv.bisect_regions(regions, "f") == 1
+    assert kv.bisect_regions(regions, "i") == 2
+    assert kv.bisect_regions(regions, "j") == 2
+    assert kv.bisect_regions(regions, "m") == 2
+    assert kv.bisect_regions(regions, "n") == 3
+    assert kv.bisect_regions(regions, "z") == 3
+
+
+def test_segments():
+    r1 = kv.Region(0, 0, "b", "d", 0)
+    r2 = kv.Region(0, 0, "f", "h", 0)
+    r3 = kv.Region(0, 0, "j", "m", 0)
+
+    regions = [r1, r2, r3]
+
+    output = kv.segment_keys(regions, "abcdefghijklmnop")
+    assert output == [
+        ("a", None),
+        ("bcd", r1),
+        ("e", None),
+        ("fgh", r2),
+        ("i", None),
+        ("jklm", r3),
+        ("nop", None)
+    ]
+
+
+def test_write_read():
+    items = [
+        (b("alfa"), b("bravo")),
+        (b("charlie"), b("delta")),
+        (b("echo"), b("foxtrot")),
+        (b("golf"), b("hotel")),
+        (b("india"), b("juliet")),
+        (b("kilo"), b("lima")),
+        (b("mike"), b("november")),
+        (b("oskar"), b("papa")),
+        (b("quebec"), b("romeo")),
+    ]
+
+    with TempDir("kvwriteread") as dirpath:
+        path = os.path.join(dirpath, "test")
+        with open(path, "wb") as f:
+            regions = list(kv.write_regions(f, items, 4096))
+        assert len(regions) == 1
+
+        with open(path, "rb") as f:
+            readitems = list(kv.read_region(f, regions[0]))
+        assert readitems == items
+
+
+def test_merge_items():
+    items1 = [("c", "d"), ("e", "f"), ("g", "h"), ("i", "j"), ("o", "p")]
+    items2 = [("_", ":"), ("a", "b"), ("e", None), ("i", "k"), ("m", "n")]
+
+    target = [
+        ("_", ":"), ("a", "b"), ("c", "d"), ("g", "h"), ("i", "k"), ("m", "n"),
+        ("o", "p")
+    ]
+
+    output = list(kv.merge_items(items1, items2))
+    assert output == target
+
+
+def test_merge_random():
+    items1 = sorted((random_name(4), random_name(8)) for _ in xrange(500))
+    items2 = sorted((random_name(4), random_name(8)) for _ in xrange(500))
+
+    x1 = sorted(dict(items1 + items2).items())
+    x2 = list(kv.merge_items(items1, items2))
+    assert x1 == x2

tests/test_mpwriter.py

         with ix.writer(procs=4, batchsize=10) as w:
             for i in xrange(10):
                 w.add_document(a=u(str(i)))
+
+
+def test_finish_segment():
+    check_multi()
+
+    from whoosh.multiproc import MpWriter
+
+    schema = fields.Schema(a=fields.KEYWORD(stored=True))
+    with TempIndex(schema) as ix:
+        w = MpWriter(ix, procs=2, batchsize=1, multisegment=False,
+                     limitmb=0.00001)
+
+        for i in range(9):
+            w.add_document(a=u(chr(65 + i) * 50))
+
+        w.commit()

tests/test_parsing.py

     qp = default.QueryParser("f", schema)
     _ = qp.parse(u("Dahmen ANDMAYBE @year:[2000 TO]"))
 
+
+def test_quoted_prefix():
+    qp = default.QueryParser("f", None)
+
+    expr = r"(^|(?<=[ (]))(?P<text>\w+|[*]):"
+    qp.replace_plugin(plugins.FieldsPlugin(expr))
+
+    q = qp.parse(u('foo url:http://apple.com:8080/bar* baz'))
+    assert isinstance(q, query.And)
+    assert q[0] == query.Term("f", "foo")
+    assert q[1] == query.Prefix("url", "http://apple.com:8080/bar")
+    assert q[2] == query.Term("f", "baz")
+    assert len(q) == 3

tests/test_results.py

 from whoosh.codec.whoosh3 import W3Codec
 from whoosh.compat import u, xrange, text_type, permutations
 from whoosh.filedb.filestore import RamStorage
-from whoosh.util.testing import TempStorage
+from whoosh.util.testing import TempStorage, TempIndex
 
 
 def test_score_retrieval():
         hit = r[0]
         assert hit["content"] == "bravo"
         assert hit.highlights("content") == ""
+
+
+def test_filter_by_result():
+    schema = fields.Schema(title=fields.TEXT(stored=True),
+                           content=fields.TEXT(stored=True))
+
+    with TempIndex(schema, "filter") as ix:
+        words = u("foo bar baz qux barney").split()
+        with ix.writer() as w:
+            for x in xrange(100):
+                t = u("even" if x % 2 == 0 else "odd")
+                c = words[x % len(words)]
+                w.add_document(title=t, content=c)
+
+        with ix.searcher() as searcher:
+            fq = query.Term("title", "even")
+            filter_result = searcher.search(fq)
+            assert filter_result.docset is None
+
+            q = query.Term("content", "foo")
+
+            # filter_result.docs()
+            result = searcher.search(q, filter=filter_result)
+            assert all(x["title"] == "even" and x["content"] == "foo"
+                       for x in result)
+

tests/test_searching.py

         assert r[0]["name"] == u("close")
 
 
+def test_find_decimals():
+    from decimal import Decimal
+
+    schema = fields.Schema(name=fields.KEYWORD(stored=True),
+                           num=fields.NUMERIC(Decimal, decimal_places=5))
+    ix = RamStorage().create_index(schema)
+
+    with ix.writer() as w:
+        w.add_document(name=u("alfa"), num=Decimal("1.5"))
+        w.add_document(name=u("bravo"), num=Decimal("2.1"))
+        w.add_document(name=u("charlie"), num=Decimal("5.3"))
+        w.add_document(name=u("delta"), num=Decimal(3))
+        w.add_document(name=u("echo"), num=Decimal("3.00001"))
+        w.add_document(name=u("foxtrot"), num=Decimal("3"))
+
+    qp = qparser.QueryParser("name", ix.schema)
+    q = qp.parse("num:3.0")
+    assert isinstance(q, query.Term)
+
+    with ix.searcher() as s:
+        r = s.search(q)
+        names = " ".join(sorted(hit["name"] for hit in r))
+        assert names == "delta foxtrot"
+
+

tests/test_sorting.py

         assert chapr[0] == "alfa"
         assert pricer[0] == 100
 
+
+def test_missing_column():
+    from whoosh import collectors
+
+    schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD)
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(id=0, tags=u("alfa bravo charlie"))
+        w.add_document(id=1, tags=u("bravo charlie delta"))
+        w.add_document(id=2, tags=u("charlie delta echo"))
+        w.merge = False
+
+    with ix.writer() as w:
+        w.add_field("age", fields.NUMERIC(sortable=True))
+
+        w.add_document(id=3, tags=u("delta echo foxtrot"), age=10)
+        w.add_document(id=4, tags=u("echo foxtrot golf"), age=5)
+        w.add_document(id=5, tags=u("foxtrot golf alfa"), age=20)
+        w.merge = False
+
+    with ix.writer() as w:
+        w.add_document(id=6, tags=u("golf alfa bravo"), age=2)
+        w.add_document(id=7, tags=u("alfa hotel india"), age=50)
+        w.add_document(id=8, tags=u("hotel india bravo"), age=15)
+        w.merge = False
+
+    with ix.searcher() as s:
+        assert not s.is_atomic()
+
+        q = query.Term("tags", u("alfa"))
+
+        # Have to use yucky low-level collector API to make sure we used a
+        # ColumnCategorizer to do the sorting
+        c = s.collector(sortedby="age")
+        assert isinstance(c, collectors.SortingCollector)
+        s.search_with_collector(q, c)
+        assert isinstance(c.categorizer, sorting.ColumnCategorizer)
+
+        r = c.results()
+        assert [hit["id"] for hit in r] == [6, 5, 7, 0]
+
+        r = s.search(q, sortedby="age", reverse=True)
+        assert [hit["id"] for hit in r] == [0, 7, 5, 6]
+
+
+def test_compound_sort():
+    fspec = fields.KEYWORD(stored=True, sortable=True)
+    schema = fields.Schema(a=fspec, b=fspec, c=fspec)
+    ix = RamStorage().create_index(schema)
+
+    alist = u("alfa bravo alfa bravo alfa bravo alfa bravo alfa bravo").split()
+    blist = u("alfa bravo charlie alfa bravo charlie alfa bravo charlie alfa").split()
+    clist = u("alfa bravo charlie delta echo foxtrot golf hotel india juliet").split()
+    assert all(len(ls) == 10 for ls in (alist, blist, clist))
+
+    with ix.writer() as w:
+        for i in xrange(10):
+            w.add_document(a=alist[i], b=blist[i], c=clist[i])
+
+    with ix.searcher() as s:
+        q = query.Every()
+        sortedby = [sorting.FieldFacet("a"),
+                    sorting.FieldFacet("b", reverse=True),
+                    sorting.FieldFacet("c")]
+
+        r = s.search(q, sortedby=sortedby)
+        output = []
+        for hit in r:
+            output.append(" ".join((hit["a"], hit["b"], hit["c"])))
+
+        assert output == [
+            "alfa charlie charlie",
+            "alfa charlie india",
+            "alfa bravo echo",
+            "alfa alfa alfa",
+            "alfa alfa golf",
+            "bravo charlie foxtrot",
+            "bravo bravo bravo",
+            "bravo bravo hotel",
+            "bravo alfa delta",
+            "bravo alfa juliet",
+        ]
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.