Commits

coady committed 6738014

Comparators implemented as chained arrays for extensibility.

  • Participants
  • Parent commits bedaad0

Comments (0)

Files changed (3)

File lupyne/engine/indexers.py

         return sorter if sorter.reverse == reverse else SortField(sorter.field, sorter.typename, sorter.parser, reverse)
     def comparator(self, field, type='string', parser=None):
         "Return :meth:`IndexReader.comparator` using a cached `SortField`_ if available."
-        sorter = self.sorter(field, type, parser)
-        if not hasattr(sorter, 'cache'):
-            sorter.cache = sorter.comparator(self.indexReader)
-        return sorter.cache
+        return self.sorter(field, type, parser).comparator(self.indexReader)
     def distances(self, lng, lat, lngfield, latfield):
         "Return distance comparator computed from cached lat/lng fields."
         arrays = (self.comparator(field, 'double') for field in (lngfield, latfield))

File lupyne/engine/queries.py

         "Discard a few term values."
         self.update(values, op='andNot', cache=False)
 
+class Comparator(object):
+    "Chained arrays with bisection lookup."
+    def __init__(self, arrays):
+        self.arrays = list(arrays)
+        self.offsets = [0]
+        for array in self.arrays:
+            self.offsets.append(len(self) + len(array))
+    def __len__(self):
+        return self.offsets[-1]
+    def __iter__(self):
+        return itertools.chain(*self.arrays)
+    def __getitem__(self, index):
+        point = bisect.bisect_right(self.offsets, index) - 1
+        return self.arrays[point][index - self.offsets[point]]
+
 class SortField(lucene.SortField):
     """Inherited lucene SortField used for caching FieldCache parsers.
     
         if lucene.MultiReader.instance_(reader):
             readers = itertools.chain.from_iterable(reader.sequentialSubReaders for reader in readers)
         arrays = list(map(self.array, readers))
-        if len(arrays) <= 1:
-            return arrays[0]
-        cls, = set(map(type, arrays))
-        index, result = 0, cls(sum(map(len, arrays)))
-        for array in arrays:
-            lucene.System.arraycopy(array, 0, result, index, len(array))
-            index += len(array)
-        return result
+        return arrays[0] if len(arrays) <= 1 else Comparator(arrays)
     def filter(self, start, stop, lower=True, upper=False):
         "Return lucene FieldCacheRangeFilter based on field and type."
         method = getattr(lucene.FieldCacheRangeFilter, 'new{0}Range'.format(self.typename))
     def terms(self, filter, *readers):
         "Generate field cache terms from docs which match filter from all segments."
         for reader in readers:
-            array, it = self.array(reader), filter.getDocIdSet(reader).iterator()
-            try:
-                while True:
-                    yield array[it.nextDoc()]
-            except IndexError:
-                pass
+            array, docset = self.array(reader), filter.getDocIdSet(reader)
+            for id in iter(docset.iterator().nextDoc, lucene.DocIdSetIterator.NO_MORE_DOCS):
+                yield array[id]
 
 class Highlighter(lucene.Highlighter):
     """Inherited lucene Highlighter with stored analysis options.

File test/local.py

         assert searcher.count() == len(searcher) == 2 * len(indexer)
         searcher.sorters['amendment'] = engine.SortField('amenmdment', int)
         comparator = searcher.comparator('amendment')
-        assert comparator is searcher.comparator('amendment') and set(map(type, comparator)) == set([int])
+        assert set(map(type, comparator)) == set([int])
         assert searcher is searcher.reopen()
         assert searcher.facets(lucene.MatchAllDocsQuery(), 'amendment')['amendment'] == dict.fromkeys(map(str, range(1, 28)), 2)
         reader = searcher.indexReader
         indexer.add()
         indexer.commit(sorters=True)
         cache = len(lucene.FieldCache.DEFAULT.cacheEntries)
-        assert indexer.comparator('year')[-1] == 0
+        assert list(indexer.comparator('year'))[-1] == 0
         assert cache == len(lucene.FieldCache.DEFAULT.cacheEntries)
     
     def testNumeric(self):