coady avatar coady committed 2598cd1

Hit objects store sort keys.

Comments (0)

Files changed (6)

lupyne/engine/documents.py

         defaults.update((name, self.getlist(name)) for name in names)
         return defaults
 
+def convert(value):
+    "Return python object from java Object."
+    if not lucene.Number.instance_(value):
+        return value.toString() if lucene.Object.instance_(value) else value
+    value = lucene.Number.cast_(value)
+    return value.doubleValue() if lucene.Float.instance_(value) or lucene.Double.instance_(value) else int(value.longValue())
+
 class Hit(Document):
-    "A Document with an id and score, from a search result."
-    def __init__(self, doc, id, score):
+    "A Document from a search result, with :attr:`id`, :attr:`score`, and optional sort :attr:`keys`."
+    def __init__(self, doc, id, score, keys=()):
         Document.__init__(self, doc)
         self.id, self.score = id, score
+        self.keys = tuple(map(convert, keys))
     def dict(self, *names, **defaults):
-        "Return dict representation of document with __id__ and __score__."
+        "Return dict representation of document with __id__, __score__, and any sort __keys__."
         result = Document.dict(self, *names, **defaults)
         result.update(__id__=self.id, __score__=self.score)
+        if self.keys:
+            result['__keys__'] = self.keys
         return result
 
 class Hits(object):
             scoredocs = self.scoredocs[start:stop] if stop - start < len(self) else self.scoredocs
             return type(self)(self.searcher, scoredocs, self.count, self.maxscore, self.fields)
         scoredoc = self.scoredocs[index]
-        return Hit(self.searcher.doc(scoredoc.doc, self.fields), scoredoc.doc, scoredoc.score)
+        keys = lucene.FieldDoc.cast_(scoredoc).fields if lucene.FieldDoc.instance_(scoredoc) else ()
+        return Hit(self.searcher.doc(scoredoc.doc, self.fields), scoredoc.doc, scoredoc.score, keys)
     @property
     def ids(self):
         return map(operator.attrgetter('doc'), self.scoredocs)
         sort = sort or self.sort
         if sort == lucene.Sort.RELEVANCE:
             scores = maxscore = True
-        collector = lucene.TermSecondPassGroupingCollector(self.field, self.searchgroups, self.sort, sort, count, scores, maxscore, False)
+        collector = lucene.TermSecondPassGroupingCollector(self.field, self.searchgroups, self.sort, sort, count, scores, maxscore, True)
         lucene.IndexSearcher.search(self.searcher, self.query, collector)
         for groupdocs in collector.getTopGroups(0).groups:
             hits = Hits(self.searcher, groupdocs.scoreDocs, groupdocs.totalHits, groupdocs.maxScore, getattr(self, 'fields', None))

lupyne/engine/indexers.py

             sort = self.sorter(sort, reverse=reverse)
         if not isinstance(sort, lucene.Sort):
             sort = lucene.Sort(sort)
-        return lucene.TopFieldCollector.create(sort, count, False, scores, maxscore, inorder)
+        return lucene.TopFieldCollector.create(sort, count, True, scores, maxscore, inorder)
     def search(self, query=None, filter=None, count=None, sort=None, reverse=False, scores=False, maxscore=False, timeout=None, **parser):
         """Run query and return `Hits`_.
         
             
             :return:
                 | {
-                | "query": *string*,
+                | "query": *string*\|null,
                 | "count": *int*\|null,
                 | "maxscore": *number*\|null,
-                | "docs": [{"__id__": *int*, "__score__": *number*, "__highlights__": {*string*: *array*,... }, *string*: *object*,... },... ],
+                | "docs": [{"__id__": *int*, "__score__": *number*, "__keys__": *array*,
+                    "__highlights__": {*string*: *array*,... }, *string*: *value*,... },... ],
                 | "facets": {*string*: {*string*: *int*,... },... },
                 | "groups": [{"count": *int*, "value": *value*, "docs": [*object*,... ]},... ]
                 | "spellcheck": {*string*: {*string*: [*string*,... ],... },... },
    - Optimized searching and sorting with unlimited count
    - Support for contrib grouping collectors and faceting
    - FieldCache comparators optimized for memory and real-time searching
+   - Caching for sort keys
 
  * Server:
    
         sort = engine.SortField('amendment', type=int)
         hits = indexer.search('text:people', count=5, sort=sort)
         assert [hit.get('amendment') for hit in hits] == [None, None, '1', '2', '4']
+        assert [key for hit in hits for key in hit.keys]== [0, 0, 1, 2, 4]
         assert all(map(math.isnan, hits.scores))
         hits = indexer.search('text:right', count=10**7, sort=sort, scores=True)
         assert not any(map(math.isnan, hits.scores)) and sorted(hits.scores, reverse=True) != hits.scores
             hits = next(grouping.groups())
             assert hits.value == 'CA.Los Angeles' and hits.count > 100 and len(hits) == 1
             hit, = hits
+            assert hit.score in hit.keys
             assert hit['county'] == 'Los Angeles' and hits.maxscore >= hit.score > 0
             hits = next(grouping.groups(count=2, sort=lucene.Sort(indexer.sorter('zipcode')), scores=True))
             assert hits.value == 'CA.Los Angeles' and math.isnan(hits.maxscore) and len(hits) == 2
-            assert all(hit.score > 0 and hit['zipcode'] > '90000' for hit in hits)
+            assert all(hit.score > 0 and hit['zipcode'] > '90000' and hit['zipcode'] in hit.keys for hit in hits)
         for count in (None, len(indexer)):
             hits = indexer.search(query, count=count, timeout=0.01)
             assert 0 <= len(hits) <= indexer.count(query) and hits.count in (None, len(hits)) and hits.maxscore in (None, 1.0)
         assert maxscore == result['maxscore'] and maxscore not in (doc['__score__'] for doc in result['docs'])
         result = resource.get('/search', q='text:people', count=5, sort='-article,amendment:int')
         assert [doc.get('amendment') for doc in result['docs']] == [None, None, '1', '2', '4']
+        assert [doc['__keys__'] for doc in result['docs']] == [['Preamble', 0], ['1', 0], [None, 1], [None, 2], [None, 4]]
         result = resource.get('/search', q='text:people', start=2, count=2, facets='article,amendment')
         assert [doc['amendment'] for doc in result['docs']] == ['10', '1']
         assert result['count'] == sum(sum(facets.values()) for facets in result['facets'].values())
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.