Commits

Matt Chaput  committed c159748

Disabled Searcher.define_facets feature. Added StoredFieldFacet.

  • Participants
  • Parent commits 769cd25
  • Tags 2.0

Comments (0)

Files changed (5)

File docs/source/recipes.rst

 See :doc:`facets`.
 
 
-Speed up sorting/grouping by QueryFacet
----------------------------------------
-::
-
-    # Runtime query facet
-    qfacet = sorting.QueryFacet({"a-m": TermRange("name", "a", "m"),
-                                 "n-z": TermRange("name", "n", "zzzzz")})
-    results = searcher.search(myquery, sortedby=qfacet)
-    
-    # Cache the query facet in a pseudo-field
-    searcher.define_facets("nameranges", qfacet, save=True)
-    
-    # Use the pseudo-field to sort/group instead of the QueryFacet
-    results = searcher.search(myquery, sortedby="nameranges")
-
-
 Results
 =======
 

File docs/source/releases/2_0.rst

 * Removed the experimental ``TermTrackingCollector`` since it is replaced by
   the new built-in term recording functionality.
 
+* Removed the experimental ``Searcher.define_facets`` feature until a future
+  release when it will be replaced by a more robust and useful feature.
+
 * Reader iteration methods (``__iter__``, ``iter_from``, ``iter_field``, etc.)
   now yield :class:`whoosh.reading.TermInfo` objects.
 

File src/whoosh/searching.py

         
         return sorting.Sorter(self, *args, **kwargs)
 
-    def define_facets(self, name, qs, save=False):
-        """This is an experimental feature which may change in future versions.
+#    def define_facets(self, name, facet, save=False):
+#        """This is an experimental feature which may change in future versions.
+#        
+#        Adds a field cache for a synthetic field defined by a
+#        :class:`whoosh.sorting.FacetType` object, for example a
+#        :class:`~whoosh.sorting.QueryFacet` or
+#        :class:`~whoosh.sorting.RangeFacet`.
+#        
+#        For example, sorting using a :class:`~whoosh.sorting.QueryFacet`
+#        recomputes the queries at sort time, which may be slow::
+#        
+#            qfacet = sorting.QueryFacet({"a-z": TermRange(...
+#            results = searcher.search(myquery, sortedby=qfacet)
+#            
+#        You can cache the results of the query facet in a field cache and use
+#        the pseudo-field for sorting::
+#        
+#            searcher.define_facets("nameranges", qfacet, save=True)
+#            
+#            results = searcher.search(myquery, sortedby="nameranges")
+#        
+#        See :doc:`/facets`.
+#        
+#        :param name: a name for the pseudo-field to cache the query results in.
+#        :param qs: a :class:`~whoosh.sorting.FacetType` object.
+#        :param save: if True, saves the field cache to disk so it is persistent
+#            across searchers. The default is False, which only creates the
+#            field cache in memory.
+#        """
+#        
+#        
+#        if self.subsearchers:
+#            ss = self.subsearchers
+#        else:
+#            ss = [(self, 0)]
+#        
+#        for s, offset in ss:
+#            doclists = defaultdict(list)
+#            catter = facet.categorizer(self)
+#            catter.set_searcher(s, offset)
+#            for docnum in xrange(s.doc_count_all()):
+#                key = catter.key_for_id(docnum)
+#                doclists[key].append(docnum)
+#            s.reader().define_facets(name, doclists, save=save)
         
-        Adds a field cache for a synthetic field defined by a dictionary of
-        queries. This creates a persistent cache to speed up a
-        :class:`whoosh.sorting.QueryFacet`. You can then use the new "field"
-        for sorting and/or faceting.
-        
-        For example, sorting using a :class:`~whoosh.sorting.QueryFacet`
-        recomputes the queries at sort time, which may be slow::
-        
-            qfacet = sorting.QueryFacet({"a-z": TermRange(...
-            results = searcher.search(myquery, sortedby=qfacet)
-            
-        You can cache the results of the query facet in a field cache and use
-        the pseudo-field for sorting::
-        
-            searcher.define_facets("nameranges", qfacet, save=True)
-            
-            results = searcher.search(myquery, sortedby="nameranges")
-        
-        See :doc:`/facets`.
-        
-        :param name: a name for the pseudo-field to cache the query results in.
-        :param qs: a QueryFacet object or dictionary mapping key values to
-            :class:`whoosh.query.Query` objects.
-        :param save: if True, saves the field cache to disk so it is persistent
-            across searchers. The default is False, which only creates the
-            field cache in memory.
-        """
-        
-        if isinstance(qs, sorting.QueryFacet):
-            qs = qs.querydict
-        
-        def doclists_for_searcher(s):
-            return dict((key, q.docs(s)) for key, q in qs.items())
-        
-        if self.subsearchers:
-            for s in self.subsearchers:
-                dls = doclists_for_searcher(s)
-                s.reader().define_facets(name, dls, save=save)
-        else:
-            dls = doclists_for_searcher(self)
-            self.ixreader.define_facets(name, dls, save=save)
-    
     def docs_for_query(self, q):
         """Returns an iterator of document numbers for documents matching the
         given :class:`whoosh.query.Query` object.

File src/whoosh/sorting.py

                                   timedelta_to_usecs)
 
 
-# Legacy sorting object
-
-class Sorter(object):
-    """This is a legacy interface. The functionality of the Sorter object was
-    moved into the :class:`FacetType` classes and the
-    :class:`whoosh.searching.Collector` in Whoosh 2.0. The old Sorter API is
-    still supported for backwards-compatibility, but it simply forwards to the
-    new API.
-    
-    See :doc:`/facets` for information on the new API.
-    """
-
-    def __init__(self, searcher):
-        self.searcher = searcher
-        self.multi = MultiFacet()
-        
-    def add_field(self, fieldname, reverse=False):
-        self.multi.add_field(fieldname, reverse=reverse)
-    
-    def sort_query(self, q, limit=None, reverse=False, filter=None, mask=None,
-                   groupedby=None):
-        from whoosh.searching import Collector
-        
-        collector = Collector(limit=limit, groupedby=groupedby)
-        return collector.sort(self.searcher, q, self.multi, reverse=reverse,
-                              allow=filter, restrict=mask)
-    
-
 # Faceting objects
 
 class FacetType(object):
         
         def key_for_id(self, docid):
             return self.fn(self.searcher, docid + self.offset)
+
+
+class StoredFieldFacet(FacetType):
+    """Lets you sort/group using the value in an unindexed, stored field (e.g.
+    STORED). This is slower than using an indexed field.
+    """
     
+    def __init__(self, fieldname):
+        self.fieldname = fieldname
+    
+    def categorizer(self, searcher):
+        return self.StoredFieldCategorizer(self.fieldname)
+    
+    class StoredFieldCategorizer(Categorizer):
+        def __init__(self, fieldname):
+            self.fieldname = fieldname
+        
+        def set_searcher(self, searcher, docoffset):
+            self.searcher = searcher
+        
+        def key_for_id(self, docid):
+            fields = self.searcher.stored_fields(docid)
+            return fields[self.fieldname]
+
 
 class MultiFacet(FacetType):
     """Sorts/facets by the combination of multiple "sub-facets".
 
 
 
+# Legacy sorting object
 
+class Sorter(object):
+    """This is a legacy interface. The functionality of the Sorter object was
+    moved into the :class:`FacetType` classes and the
+    :class:`whoosh.searching.Collector` in Whoosh 2.0. The old Sorter API is
+    still supported for backwards-compatibility, but it simply forwards to the
+    new API.
+    
+    See :doc:`/facets` for information on the new API.
+    """
 
+    def __init__(self, searcher):
+        self.searcher = searcher
+        self.multi = MultiFacet()
+        
+    def add_field(self, fieldname, reverse=False):
+        self.multi.add_field(fieldname, reverse=reverse)
+    
+    def sort_query(self, q, limit=None, reverse=False, filter=None, mask=None,
+                   groupedby=None):
+        from whoosh.searching import Collector
+        
+        collector = Collector(limit=limit, groupedby=groupedby)
+        return collector.sort(self.searcher, q, self.multi, reverse=reverse,
+                              allow=filter, restrict=mask)
 
+
+
+
+
+
+
+

File tests/test_sorting.py

     check(make_single_index)
     check(make_multi_index)
 
-def test_define_facets():
-    schema = fields.Schema(value=fields.ID(stored=True))
-    with TempIndex(schema, "queryfacets") as ix:
-        w = ix.writer()
-        alphabet = list(u("abcdefghijklmnopqrstuvwxyz"))
-        random.shuffle(alphabet)
-        
-        for letter in alphabet:
-            w.add_document(value=letter)
-        w.commit()
-        
-        with ix.searcher() as s:
-            q1 = query.TermRange("value", u("a"), u("i"))
-            q2 = query.TermRange("value", u("j"), u("r"))
-            q3 = query.TermRange("value", u("s"), u("z"))
-            s.define_facets("range", {"a-i": q1, "j-r": q2, "s-z": q3},
-                            save=False)
-            
-            def check(groups):
-                for key in groups.keys():
-                    groups[key] = "".join(sorted([s.stored_fields(id)["value"]
-                                                  for id in groups[key]]))
-                assert_equal(groups, {'a-i': u('abcdefghi'), 'j-r': u('jklmnopqr'),
-                                      's-z': u('stuvwxyz')})
-            
-            check(s.search(query.Every(), groupedby="range").groups("range"))
-
-        with ix.searcher() as s:
-            assert not s.reader().fieldcache_available("range")
+#def test_define_facets():
+#    schema = fields.Schema(value=fields.ID(stored=True))
+#    with TempIndex(schema, "queryfacets") as ix:
+#        w = ix.writer()
+#        alphabet = list(u("abcdefghijklmnopqrstuvwxyz"))
+#        random.shuffle(alphabet)
+#        
+#        for letter in alphabet:
+#            w.add_document(value=letter)
+#        w.commit()
+#        
+#        with ix.searcher() as s:
+#            q1 = query.TermRange("value", u("a"), u("i"))
+#            q2 = query.TermRange("value", u("j"), u("r"))
+#            q3 = query.TermRange("value", u("s"), u("z"))
+#            qfacet = sorting.QueryFacet({"a-i": q1, "j-r": q2, "s-z": q3})
+#            s.define_facets("range", qfacet, save=False)
+#            
+#            def check(groups):
+#                for key in groups.keys():
+#                    groups[key] = "".join(sorted([s.stored_fields(id)["value"]
+#                                                  for id in groups[key]]))
+#                assert_equal(groups, {'a-i': u('abcdefghi'), 'j-r': u('jklmnopqr'),
+#                                      's-z': u('stuvwxyz')})
+#            
+#            check(s.search(query.Every(), groupedby="range").groups("range"))
+#
+#        with ix.searcher() as s:
+#            assert not s.reader().fieldcache_available("range")
 
 def test_multifacet():
     schema = fields.Schema(tag=fields.ID(stored=True),