Commits

Matt Chaput committed 32c2616

Added __ne__ method to Filter.
Fixes issue #374.

Comments (0)

Files changed (5)

src/whoosh/analysis/filters.py

     """
 
     def __eq__(self, other):
-        return other and self.__class__ is other.__class__
+        return (other
+                and self.__class__ is other.__class__
+                and self.__dict__ == other.__dict__)
+
+    def __ne__(self, other):
+        return not self == other
 
     def __call__(self, tokens):
         raise NotImplementedError

src/whoosh/qparser/common.py

 
 def attach(q, stxnode):
     if q:
-        q.startchar = stxnode.startchar
-        q.endchar = stxnode.endchar
+        try:
+            q.startchar = stxnode.startchar
+            q.endchar = stxnode.endchar
+        except AttributeError:
+            raise AttributeError("Can't set attribute on %s"
+                                 % q.__class__.__name__)
     return q
 
 

src/whoosh/redline.py

+import os.path
+import re
+import struct
+from bisect import bisect_left, bisect_right, insort
+from itertools import chain
+
+from whoosh.compat import next, xrange, iteritems
+from whoosh.compat import load, dump
+
+
+itemheader = struct.Struct("<Hi")
+ushort_struct = struct.Struct("<H")
+int_struct = struct.Struct("<i")
+uint_struct = struct.Struct("<I")
+long_struct = struct.Struct("<q")
+pack_ushort, unpack_ushort = ushort_struct.pack, ushort_struct.unpack
+pack_int, unpack_int = int_struct.pack, int_struct.unpack
+pack_uint, unpack_uint = uint_struct.pack, uint_struct.unpack
+pack_long, unpack_long = long_struct.pack, long_struct.unpack
+
+
+class Region(object):
+    def __init__(self, start, end, minkey, maxkey, length):
+        self.start = start
+        self.end = end
+        self.minkey = minkey
+        self.maxkey = maxkey
+        self.length = length
+
+    def __repr__(self):
+        return "<%s %r-%r>" % (self.__class__.__name__,
+                               self.minkey, self.maxkey)
+
+
+class RegionReader(object):
+    def __init__(self, dbfile, mm, region):
+        self._dbfile = dbfile
+        self._mm = mm
+        self._region = region
+
+        self._start = region.start
+        self._end = region.end
+        self.minkey = region.minkey
+        self.maxkey = region.maxkey
+        self._length = region.length
+        self.loaded = False
+
+        self._poses = None
+        self._index = None
+
+    def load(self):
+        f = self._dbfile
+        _read = f.read
+        _unpack = itemheader.unpack
+        _headersize = itemheader.size
+
+        pos = self._start
+        f.seek(pos)
+        for i in xrange(self._length):
+            keylen, vlen = _unpack(_read(_headersize))
+            pos += _headersize
+            key = _read(keylen)
+            pos += keylen
+
+            self._poses[key] = pos
+            pos += vlen
+
+        assert f.tell() == pos == self._end
+        self.loaded = True
+
+    def __getitem__(self, key):
+        pos = self._poses[key]
+        return self._mm[pos]
+
+
+def write_regions(dbfile, items, maxsize):
+    _write = dbfile.write
+    _pack = itemheader.pack
+    _headersize = itemheader.size
+
+    start = dbfile.tell()
+    minkey = None
+    size = 0
+    length = 0
+
+    key = None
+    for key, value in items:
+        if minkey is None:
+            minkey = key
+
+        _write(_pack(len(key), len(value)) + key + value)
+        size += _headersize + len(key) + len(value)
+        length += 1
+
+        if size >= maxsize:
+            end = dbfile.tell()
+            reg = Region(start, end, minkey, key, length)
+            yield reg
+
+            size = 0
+            length = 0
+            minkey = None
+            start = end
+
+    if length:
+        assert minkey is not None and key is not None
+        reg = Region(start, dbfile.tell(), minkey, key, length)
+        yield reg
+
+
+def read_region(dbfile, region, start=None):
+    _read = dbfile.read
+    _unpack = itemheader.unpack
+    _headersize = itemheader.size
+
+    start = start if start is not None else region.start
+    dbfile.seek(start)
+
+    first = True
+    for i in xrange(region.length):
+        keylen, vlen = _unpack(_read(_headersize))
+        key = _read(keylen)
+        val = _read(vlen)
+
+        if first:
+            assert key == region.minkey
+            first = False
+
+        yield key, val
+
+    assert dbfile.tell() == region.end
+
+
+def bisect_regions(regions, key):
+    # Find the index of the region that would contain the given key
+
+    lo = 0
+    hi = len(regions)
+    while lo < hi:
+        mid = (lo + hi) // 2
+        region = regions[mid]
+
+        if region.minkey <= key <= region.maxkey:
+            return mid
+        elif region.maxkey < key:
+            lo = mid + 1
+        else:
+            hi = mid
+
+    return lo
+
+
+def segment_keys(regions, keys):
+    if not keys:
+        return
+
+    k1 = keys[0]
+    kn = keys[-1]
+
+    if not regions or k1 > regions[-1].maxkey or kn < regions[0].minkey:
+        return [(keys, None)]
+
+    new = []
+    left = 0
+    r = bisect_regions(regions, k1)
+
+    while left < len(keys) and r < len(regions):
+        leftkey = keys[left]
+        region = regions[r]
+
+        if leftkey > region.maxkey:
+            r += 1
+        elif leftkey < region.minkey:
+            right = bisect_left(keys, region.minkey, left)
+            new.append((keys[left:right], None))
+            left = right
+        else:
+            right = bisect_right(keys, region.maxkey, left)
+            new.append((keys[left:right], region))
+            left = right
+            regions.pop(r)
+
+    if left < len(keys):
+        new.append((keys[left:], None))
+
+    return new
+
+
+def merge_items(olditems, newitems):
+    i = 0
+    _len = len(newitems)
+    for item in olditems:
+        key = item[0]
+
+        # Yield any items in newitems that come before the current key
+        # in the iterator
+        while i < _len and newitems[i][0] < key:
+            yield newitems[i]
+            i += 1
+
+        # newitems override olditems
+        if i < _len and newitems[i][0] == key:
+            item = newitems[i]
+            i += 1
+
+            # If the value is a tombstone, swallow the item
+            if item[1] is None:
+                continue
+
+        yield item
+
+    if i < _len:
+        for item in newitems[i:]:
+            yield item
+
+
+
+
+

tests/test_keyval.py

+from __future__ import with_statement
+
+import os.path
+import random
+
+import pytest
+
+from whoosh import redline as kv
+from whoosh.compat import b, xrange
+from whoosh.util import now, random_name
+from whoosh.util.testing import TempDir
+
+
+def test_bisect_regions():
+    regions = [kv.Region(0, 0, "b", "d", 0),
+               kv.Region(0, 0, "f", "h", 0),
+               kv.Region(0, 0, "j", "m", 0)]
+
+    assert kv.bisect_regions(regions, "a") == 0
+    assert kv.bisect_regions(regions, "b") == 0
+    assert kv.bisect_regions(regions, "c") == 0
+    assert kv.bisect_regions(regions, "d") == 0
+    assert kv.bisect_regions(regions, "e") == 1
+    assert kv.bisect_regions(regions, "f") == 1
+    assert kv.bisect_regions(regions, "i") == 2
+    assert kv.bisect_regions(regions, "j") == 2
+    assert kv.bisect_regions(regions, "m") == 2
+    assert kv.bisect_regions(regions, "n") == 3
+    assert kv.bisect_regions(regions, "z") == 3
+
+
+def test_segments():
+    r1 = kv.Region(0, 0, "b", "d", 0)
+    r2 = kv.Region(0, 0, "f", "h", 0)
+    r3 = kv.Region(0, 0, "j", "m", 0)
+
+    regions = [r1, r2, r3]
+
+    output = kv.segment_keys(regions, "abcdefghijklmnop")
+    assert output == [
+        ("a", None),
+        ("bcd", r1),
+        ("e", None),
+        ("fgh", r2),
+        ("i", None),
+        ("jklm", r3),
+        ("nop", None)
+    ]
+
+
+def test_write_read():
+    items = [
+        (b("alfa"), b("bravo")),
+        (b("charlie"), b("delta")),
+        (b("echo"), b("foxtrot")),
+        (b("golf"), b("hotel")),
+        (b("india"), b("juliet")),
+        (b("kilo"), b("lima")),
+        (b("mike"), b("november")),
+        (b("oskar"), b("papa")),
+        (b("quebec"), b("romeo")),
+    ]
+
+    with TempDir("kvwriteread") as dirpath:
+        path = os.path.join(dirpath, "test")
+        with open(path, "wb") as f:
+            regions = list(kv.write_regions(f, items, 4096))
+        assert len(regions) == 1
+
+        with open(path, "rb") as f:
+            readitems = list(kv.read_region(f, regions[0]))
+        assert readitems == items
+
+
+def test_merge_items():
+    items1 = [("c", "d"), ("e", "f"), ("g", "h"), ("i", "j"), ("o", "p")]
+    items2 = [("_", ":"), ("a", "b"), ("e", None), ("i", "k"), ("m", "n")]
+
+    target = [
+        ("_", ":"), ("a", "b"), ("c", "d"), ("g", "h"), ("i", "k"), ("m", "n"),
+        ("o", "p")
+    ]
+
+    output = list(kv.merge_items(items1, items2))
+    assert output == target
+
+
+def test_merge_random():
+    items1 = sorted((random_name(4), random_name(8)) for _ in xrange(500))
+    items2 = sorted((random_name(4), random_name(8)) for _ in xrange(500))
+
+    x1 = sorted(dict(items1 + items2).items())
+    x2 = list(kv.merge_items(items1, items2))
+    assert x1 == x2

tests/test_mpwriter.py

         with ix.writer(procs=4, batchsize=10) as w:
             for i in xrange(10):
                 w.add_document(a=u(str(i)))
+
+
+def test_finish_segment():
+    check_multi()
+
+    from whoosh.multiproc import MpWriter
+
+    schema = fields.Schema(a=fields.KEYWORD(stored=True))
+    with TempIndex(schema) as ix:
+        w = MpWriter(ix, procs=2, batchsize=1, multisegment=False,
+                     limitmb=0.00001)
+
+        for i in range(9):
+            w.add_document(a=u(chr(65 + i) * 50))
+
+        w.commit()