Commits

Matt Chaput committed 4470a88

Added FacetMap objects and the "maptype" argument to Searcher.search() and
FacetType constructors, which allows much greater control over how facet groups are
recorded.
Cleaned up and updated documentation.
Fixes issue #199.

  • Participants
  • Parent commits 03475e7

Comments (0)

Files changed (5)

File docs/source/api/sorting.rst

 .. autoclass:: Facets
     :members:
 
+
+FacetType objects
+=================
+
+.. autoclass:: FacetMap
+    :members:
+.. autoclass:: OrderedList
+.. autoclass:: UnorderedList
+.. autoclass:: Count
+.. autoclass:: Best
+
+

File docs/source/facets.rst

 
 A ``FacetType`` object
     Uses this object to group the documents. See below for the available facet
-    types. The facet name will automatically be set to ``"facet"``.
+    types.
 
 A field name string
     Converts the field name into a ``FieldFacet`` (see below) and uses it to
     sort the documents. The name of the field is used as the facet name.
 
+A list or tuple of field name strings
+    Sets up multiple field grouping criteria.
+
 A dictionary mapping facet names to FacetType objects
-    Sets up multiple grouping crieteria.
+    Sets up multiple grouping criteria.
 
 A ``Facets`` object
     This object is a lot like using a dictionary, but has some convenience
     methods to make setting up multiple groupings a little easier.
 
+
 Examples
 --------
 
 
     cats = sorting.FieldFacet("category")
     tags = sorting.FieldFacet("tags", allow_overlap=True)
-    results = searcher.search(myquery, groupedby={"cats": cats, "tags": tags})
+    results = searcher.search(myquery, groupedby={"category": cats, "tags": tags})
     
     # ...or, using a Facets object has a little less duplication
     facets = sorting.Facets()
 --------------------------
 
 The ``Results.groups("facetname")`` method  returns a dictionary mapping
-category names to lists of **document IDs**.
+category names to lists of **document IDs**::
 
-    {"small": [1, 2, 4, 5, 8], "medium": [0, 3, 6], "large": [7, 9]}
+    myfacets = sorting.Facets().add_field("size").add_field("tag")
+    results = mysearcher.search(myquery, groupedby=myfacets)
+    results.groups("size")
+    # {"small": [8, 5, 1, 2, 4], "medium": [3, 0, 6], "large": [7, 9]}
 
-The ``Searcher`` object's ``stored_fields()`` method takes a document number
-and returns the document's stored fields as a dictionary::
+If there is only one facet, you can just use ``Results.groups()`` with no
+argument to access its groups::
+
+    results = mysearcher.search(myquery, groupedby=myfunctionfacet)
+    results.groups()
+
+By default, the values in the dictionary returned by ``groups()`` are lists of
+document numbers in the same relative order as in the results. You can use the
+``Searcher`` object's ``stored_fields()`` method to take a document number and
+return the document's stored fields as a dictionary::
 
     for category_name in categories:
         print "Top 5 documents in the %s category" % category_name
         if len(doclist) > 5:
             print "  (%s more)" % (len(doclist) - 5)
 
-(You can use ``Searcher.stored_fields(docnum)`` to get the stored fields
-associated with a document number.)
+If you want different information about the groups, for example just the count
+of documents in each group, or you don't need the groups to be ordered, you can
+specify a :class:`whoosh.sorting.FacetMap` type or instance with the
+``maptype`` keyword argument when creating the ``FacetType``::
 
-If you just want to **count** the number of documents in each group, instead of
-generating a full list of the documents, use the ``groupids=False`` keyword
-argument::
+    # This is the same as the default
+    myfacet = FieldFacet("size", maptype=sorting.OrderedList)
+    results = mysearcher.search(myquery, groupedby=myfacet)
+    results.groups()
+    # {"small": [8, 5, 1, 2, 4], "medium": [3, 0, 6], "large": [7, 9]}
+    
+    # Don't sort the groups to match the order of documents in the results
+    # (faster)
+    myfacet = FieldFacet("size", maptype=sorting.UnorderedList)
+    results = mysearcher.search(myquery, groupedby=myfacet)
+    results.groups()
+    # {"small": [1, 2, 4, 5, 8], "medium": [0, 3, 6], "large": [7, 9]}
 
-    results = searcher.search(myquery, groupedby="size")
-    groups = results.groups("size")
+    # Only count the documents in each group
+    myfacet = FieldFacet("size", maptype=sorting.Count)
+    results = mysearcher.search(myquery, groupedby=myfacet)
+    results.groups()
     # {"small": 5, "medium": 3, "large": 2}
+    
+    # Only remember the "best" document in each group
+    myfacet = FieldFacet("size", maptype=sorting.Best)
+    results = mysearcher.search(myquery, groupedby=myfacet)
+    results.groups()
+    # {"small": 8, "medium": 3, "large": 7}
 
-To generate multiple groupings, you can name multiple fields in the list you
-pass to the `groups` keyword::
+Alternatively you can specify a ``maptype`` argument in the
+``Searcher.search()`` method call which applies to all facets::
 
-    # Generate separate groupings for the "tag" and "size" fields
-    results = searcher.search(myquery, groupedby=["tag", "size"])
-    
-    # Get the groupings by "tag"
-    tag_groups = results.groups("tag")
-    
-    # Get the groupings by "size"
-    size_groups = results.groups("size")
+    results = mysearcher.search(myquery, groupedby=["size", "tag"],
+                                maptype=sorting.Count)
+
+(You can override this overall ``maptype`` argument on individual facets by
+specifying the ``maptype`` argument for them as well.)
 
 
 Facet types

File src/whoosh/searching.py

                 yield docnum
 
     def search(self, q, limit=10, sortedby=None, reverse=False, groupedby=None,
-               optimize=True, filter=None, mask=None, groupids=True,
-               terms=False):
+               optimize=True, filter=None, mask=None, terms=False,
+               maptype=None):
         """Runs the query represented by the ``query`` object and returns a
         Results object.
         
             will only contain documents that are also in the filter object.
         :param mask: a query, Results object, or set of docnums. The results
             will not contain documents that are also in the mask object.
-        :param groupids: by default, faceting groups map keys to lists of
-            document numbers associated with that key. To map to a simple count
-            of the number of documents instead of a list, use
-            ``groupids=False``.
         :param terms: if True, record which terms were found in each matching
             document. You can use :meth:`Results.contains_term` or
             :meth:`Hit.contains_term` to check whether a hit contains a
             particular term.
+        :param maptype: by default, the results of faceting with ``groupedby``
+            is a dictionary mapping group names to ordered lists of document
+            numbers in the group. You can pass a
+            :class:`whoosh.sorting.FacetMap` subclass to this keyword argument
+            to specify a different (usually faster) method for grouping. For
+            example, ``maptype=sorting.Count`` would store only the count of
+            documents in each group, instead of the full list of document IDs.
         :rtype: :class:`Results`
         """
 
             raise ValueError("limit must be >= 1")
 
         collector = Collector(limit=limit, usequality=optimize,
-                              groupedby=groupedby, groupids=groupids,
-                              terms=terms)
+                              groupedby=groupedby, terms=terms,
+                              maptype=maptype)
 
         if sortedby:
             return collector.sort(self, q, sortedby, reverse=reverse,
     """
 
     def __init__(self, limit=10, usequality=True, groupedby=None,
-                 groupids=True, timelimit=None, greedy=False, terms=False,
-                 replace=10):
+                 timelimit=None, greedy=False, terms=False, replace=10,
+                 maptype=None):
         """
         :param limit: the maximum number of hits to collect. If this is None,
             collect all hits.
         :param usequality: whether to use block quality optimizations when
             available. This is mostly useful for debugging purposes.
         :param groupedby: see :doc:`/facets` for information.
-        :param groupids: if True, saves lists of document IDs for facets. If
-            False, only saves a count of the number of documents in each group.
         :param timelimit: the maximum amount of time (in possibly fractional
             seconds) to allow for searching. If the search takes longer than
             this, it will raise a ``TimeLimit`` exception.
         :param greedy: if ``True``, the collector will finish adding the most
             recent hit before raising the ``TimeLimit`` exception.
         :param terms: if ``True``, record which terms matched in each document.
+        :param maptype: a :class:`whoosh.sorting.FacetMap` type to use for all
+            facets that don't specify their own.
         """
 
         self.limit = limit
         self.replace = replace
         self.timelimit = timelimit
         self.greedy = greedy
-        self.groupids = groupids
+        self.maptype = maptype
         self.termlists = defaultdict(set) if terms else None
 
         self.facets = None
         return scorefn
 
     def _set_categorizers(self, searcher, offset):
-        groups = self.groups
         if self.facets:
-            self.categorizers = dict((name, facet.categorizer(searcher))
-                                     for name, facet in self.facets.items())
-
-            for name, catter in self.categorizers.items():
-                if self.groupids and name not in groups:
-                    groups[name] = defaultdict(list)
-                elif name not in groups:
-                    groups[name] = defaultdict(int)
-
+            self.categorizers = {}
+            for name, facet in self.facets.items():
+                catter = facet.categorizer(searcher)
                 catter.set_searcher(searcher, offset)
+                self.categorizers[name] = catter
 
     def _set_filters(self, allow, restrict):
         if allow:
             self.timer.start()
 
     def _reset(self):
-        self.groups = {}
+        self.facetmaps = {}
         self.items = []
         self.timedout = False
         self.runtime = -1
         self.minscore = None
+        if self.facets:
+            self.facetmaps = dict((facetname, facet.map(self.maptype))
+                                  for facetname, facet in self.facets.items())
+        else:
+            self.facetmaps = {}
 
     def _timestop(self):
         # Called by the Timer when the time limit expires. Set an attribute on
         self.timer = None
         self.timedout = True
 
-    def _add_to_group(self, name, key, offsetid, sortkey):
-        if self.groupids:
-            self.groups[name][key].append((sortkey, offsetid))
-        else:
-            self.groups[name][key] += 1
-
     def collect(self, id, offsetid, sortkey):
         docset = self.docset
         if docset is not None:
             docset.add(offsetid)
 
         if self.facets is not None:
-            add = self._add_to_group
             for name, catter in self.categorizers.items():
+                add = self.facetmaps[name].add
                 if catter.allow_overlap:
                     for key in catter.keys_for_id(id):
-                        add(name, catter.key_to_name(key), offsetid, sortkey)
+                        add(catter.key_to_name(key), offsetid, sortkey)
                 else:
                     key = catter.key_to_name(catter.key_for_id(id))
-                    add(name, key, offsetid, sortkey)
+                    add(key, offsetid, sortkey)
 
     def search(self, searcher, q, allow=None, restrict=None):
         """Top-level method call which uses the given :class:`Searcher` and
             items = sorted(self.items, reverse=reverse)
 
         return Results(self.searcher, self.q, items, self.docset,
-                       groups=self.groups, runtime=self.runtime,
+                       facetmaps=self.facetmaps, runtime=self.runtime,
                        filter=self.allow, mask=self.restrict,
                        termlists=self.termlists)
 
     so keeps all files used by it open.
     """
 
-    def __init__(self, searcher, q, top_n, docset, groups=None, runtime= -1,
+    def __init__(self, searcher, q, top_n, docset, facetmaps=None, runtime= -1,
                  filter=None, mask=None, termlists=None, highlighter=None):
         """
         :param searcher: the :class:`Searcher` object that produced these
         self.q = q
         self.top_n = top_n
         self.docset = docset
-        self._groups = groups or {}
+        self._facetmaps = facetmaps or {}
         self.runtime = runtime
         self._filter = filter
         self._mask = mask
 
         return self.searcher.stored_fields(self.top_n[n][1])
 
-    def groups(self, name):
-        """If you generating groupings for the results by using the `groups`
-        keyword to the `search()` method, you can use this method to retrieve
-        the groups.
+    def facet_names(self):
+        """Returns the available facet names, for use with the ``groups()``
+        method.
+        """
+
+        return self._facetmaps.keys()
+
+    def groups(self, name=None):
+        """If you generated facet groupings for the results using the
+        `groupedby` keyword argument to the ``search()`` method, you can use
+        this method to retrieve the groups. You can use the ``facet_names()``
+        method to get the list of available facet names.
         
-        >>> results = searcher.search(my_query, groups=["tag"])
+        >>> results = searcher.search(my_query, groupedby=["tag", "price"])
+        >>> results.facet_names()
+        ["tag", "price"]
         >>> results.groups("tag")
+        {"new": [12, 1, 4], "apple": [3, 10, 5], "search": [11]}
         
-        Returns a dictionary mapping category names to lists of document IDs.
+        If you only used one facet, you can call the method without a facet
+        name to get the groups for the facet.
         
-        >>> groups = results.groups("tag")
-        >>> groups['new']
-        set([1, 4, 12])
+        >>> results = searcher.search(my_query, groupedby="tag")
+        >>> results.groups()
+        {"new": [12, 1, 4], "apple": [3, 10, 5, 0], "search": [11]}
+        
+        By default, this returns a dictionary mapping category names to a list
+        of document numbers, in the same relative order as they appear in the
+        results.
+        
+        >>> results = mysearcher.search(myquery, groupedby="tag")
+        >>> docnums = results.groups()
+        >>> docnums['new']
+        [12, 1, 4]
         
         You can then use :meth:`Searcher.stored_fields` to get the stored
         fields associated with a document ID.
+        
+        If you specified a different ``maptype`` for the facet when you
+        searched, the values in the dictionary depend on the
+        :class:`whoosh.sorting.FacetMap`.
+        
+        >>> myfacet = sorting.FieldFacet("tag", maptype=sorting.Count)
+        >>> results = mysearcher.search(myquery, groupedby=myfacet)
+        >>> counts = results.groups()
+        {"new": 3, "apple": 4, "search": 1}
         """
 
-        if name not in self._groups:
-            raise KeyError("%r not in group names %r"
-                           % (name, self._groups.keys()))
-        # Sort the groups and remove the sort keys before returning them
-        groups = self._groups[name]
-        d = {}
-        for key, items in iteritems(groups):
-            d[key] = [docnum for _, docnum in sorted(items)]
-        return d
+        if (name is None or name == "facet") and len(self._facetmaps) == 1:
+            name = self._facetmaps.keys()[0]
+        elif name not in self._facetmaps:
+            raise KeyError("%r not in facet names %r"
+                           % (name, self.facet_names()))
+        return self._facetmaps[name].as_dict()
 
     def _load_docs(self):
         # If self.docset is None, that means this results object was created

File src/whoosh/sorting.py

 # policies, either expressed or implied, of Matt Chaput.
 
 from array import array
+from collections import defaultdict
 
-from whoosh.compat import string_type, u, xrange
+from whoosh.compat import string_type, u, xrange, iteritems
 from whoosh.fields import DEFAULT_LONG
 from whoosh.support.times import (long_to_datetime, datetime_to_long,
                                   timedelta_to_usecs)
     """Base class for "facets", aspects that can be sorted/faceted.
     """
 
+    maptype = None
+
     def categorizer(self, searcher):
         """Returns a :class:`Categorizer` corresponding to this facet.
         """
 
         raise NotImplementedError
 
+    def map(self, default=None):
+        t = self.maptype
+        if t is None:
+            t = default
+
+        if t is None:
+            return OrderedList()
+        elif type(t) is type:
+            return t()
+        else:
+            return t
+
+    def default_name(self):
+        return "facet"
+
 
 class Categorizer(object):
     """Base class for categorizer objects which compute a key value for a
     This facet returns different categorizers based on the field type.
     """
 
-    def __init__(self, fieldname, reverse=False, allow_overlap=False):
+    def __init__(self, fieldname, reverse=False, allow_overlap=False,
+                 maptype=None):
         """
         :param fieldname: the name of the field to sort/facet on.
         :param reverse: if True, when sorting, reverse the sort order of this
         self.fieldname = fieldname
         self.reverse = reverse
         self.allow_overlap = allow_overlap
+        self.maptype = maptype
+
+    def default_name(self):
+        return self.fieldname
 
     def categorizer(self, searcher):
         from whoosh.fields import NUMERIC, DATETIME
     """Sorts/facets based on the results of a series of queries.
     """
 
-    def __init__(self, querydict, other=None, allow_overlap=False):
+    def __init__(self, querydict, other=None, allow_overlap=False,
+                 maptype=None):
         """
         :param querydict: a dictionary mapping keys to
             :class:`whoosh.query.Query` objects.
 
         self.querydict = querydict
         self.other = other
+        self.maptype = maptype
 
     def categorizer(self, searcher):
         return self.QueryCategorizer(self.querydict, self.other)
     at the end.
     """
 
-    def __init__(self, fieldname, start, end, gap, hardend=False):
+    def __init__(self, fieldname, start, end, gap, hardend=False,
+                 maptype=None):
         """
         :param fieldname: the numeric field to sort/facet on.
         :param start: the start of the entire range.
         self.end = end
         self.gap = gap
         self.hardend = hardend
+        self.maptype = maptype
         self._queries()
 
+    def default_name(self):
+        return self.fieldname
+
     def _rangetype(self):
         from whoosh import query
 
         lengths = FunctionFacet(fn)
     """
 
-    def __init__(self, fn):
+    def __init__(self, fn, maptype=None):
         self.fn = fn
+        self.maptype = maptype
 
     def categorizer(self, searcher):
         return self.FunctionCategorizer(searcher, self.fn)
     if one is supplied).
     """
 
-    def __init__(self, fieldname, allow_overlap=False, split_fn=None):
+    def __init__(self, fieldname, allow_overlap=False, split_fn=None,
+                 maptype=None):
         """
         :param fieldname: the name of the stored field.
         :param allow_overlap: if True, when grouping, allow documents to appear
         self.fieldname = fieldname
         self.allow_overlap = allow_overlap
         self.split_fn = None
+        self.maptype = maptype
+
+    def default_name(self):
+        return self.fieldname
 
     def categorizer(self, searcher):
         return self.StoredFieldCategorizer(self.fieldname, self.allow_overlap,
                          "n-z": TermRange("name", "n", "z")})
     """
 
-    def __init__(self, items=None):
+    def __init__(self, items=None, maptype=None):
         self.facets = []
         if items:
             for item in items:
                 self._add(item)
+        self.maptype = maptype
 
     @classmethod
     def from_sortedby(cls, sortedby):
         elif isinstance(groupedby, string_type):
             facets.add_field(groupedby)
         elif isinstance(groupedby, FacetType):
-            facets.add_facet("facet", groupedby)
+            facets.add_facet(groupedby.default_name(), groupedby)
         elif isinstance(groupedby, (list, tuple)):
             for item in groupedby:
                 facets.add_facets(cls.from_groupedby(item))
 
         return facets
 
+    def names(self):
+        """Returns an iterator of the facet names in this object.
+        """
+
+        return iter(self.facets)
+
     def items(self):
         """Returns a list of (facetname, facetobject) tuples for the facets in
         this object.
 
         return self.facets.items()
 
-    def add_field(self, fieldname, allow_overlap=False):
+    def add_field(self, fieldname, **kwargs):
         """Adds a :class:`FieldFacet` for the given field name (the field name
         is automatically used as the facet name).
         """
 
-        self.facets[fieldname] = FieldFacet(fieldname,
-                                            allow_overlap=allow_overlap)
+        self.facets[fieldname] = FieldFacet(fieldname, **kwargs)
         return self
 
-    def add_query(self, name, querydict, other=None, allow_overlap=False):
+    def add_query(self, name, querydict, **kwargs):
         """Adds a :class:`QueryFacet` under the given ``name``.
         
         :param name: a name for the facet.
             :class:`whoosh.query.Query` objects.
         """
 
-        self.facets[name] = QueryFacet(querydict, other=other,
-                                       allow_overlap=allow_overlap)
+        self.facets[name] = QueryFacet(querydict, **kwargs)
         return self
 
     def add_facet(self, name, facet):
         return self
 
 
+# Objects for holding facet groups
+
+class FacetMap(object):
+    """Base class for objects holding the results of grouping search results by
+    a Facet. Use an object's ``as_dict()`` method to access the results.
+    
+    You can pass a subclass of this to the ``maptype`` keyword argument when
+    creating a ``FacetType`` object to specify what information the facet
+    should record about the group. For example::
+    
+        # Record each document in each group in its sorted order
+        myfacet = FieldFacet("size", maptype=OrderedList)
+        
+        # Record only the count of documents in each group
+        myfacet = FieldFacet("size", maptype=Count)
+    """
+
+    def add(self, groupname, docid, sortkey):
+        """Adds a document to the facet results.
+        
+        :param groupname: the name of the group to add this document to.
+        :param docid: the document number of the document to add.
+        :param sortkey: a value representing the sort position of the document
+            in the full results.
+        """
+
+        raise NotImplementedError
+
+    def as_dict(self):
+        """Returns a dictionary object mapping group names to
+        implementation-specific values. For example, the value might be a list
+        of document numbers, or a integer representing the number of documents
+        in the group.
+        """
+
+        raise NotImplementedError
+
+
+class OrderedList(FacetMap):
+    """Stores a list of document numbers for each group, in the same order as
+    they appear in the search results.
+    
+    The ``as_dict`` method returns a dictionary mapping group names to lists
+    of document numbers.
+    """
+
+    def __init__(self):
+        self.dict = defaultdict(list)
+
+    def add(self, groupname, docid, sortkey):
+        self.dict[groupname].append((sortkey, docid))
+
+    def as_dict(self):
+        d = {}
+        for key, items in iteritems(self.dict):
+            d[key] = [docnum for _, docnum in sorted(items)]
+        return d
+
+
+class UnorderedList(FacetMap):
+    """Stores a list of document numbers for each group, in arbitrary order.
+    This is slightly faster and uses less memory than
+    :class:`OrderedListResult` if you don't care about the ordering of the
+    documents within groups.
+    
+    The ``as_dict`` method returns a dictionary mapping group names to lists
+    of document numbers.
+    """
+
+    def __init__(self):
+        self.dict = defaultdict(list)
+
+    def add(self, groupname, docid, sortkey):
+        self.dict[groupname].append(docid)
+
+    def as_dict(self):
+        return dict(self.dict)
+
+
+class Count(FacetMap):
+    """Stores the number of documents in each group.
+    
+    The ``as_dict`` method returns a dictionary mapping group names to
+    integers.
+    """
+
+    def __init__(self):
+        self.dict = defaultdict(int)
+
+    def add(self, groupname, docid, sortkey):
+        self.dict[groupname] += 1
+
+    def as_dict(self):
+        return dict(self.dict)
+
+
+class Best(FacetMap):
+    """Stores the "best" document in each group (that is, the one that appears
+    highest in the results).
+    
+    The ``as_dict`` method returns a dictionary mapping group names to
+    docnument numbers.
+    """
+
+    def __init__(self):
+        self.bestids = {}
+        self.bestkeys = {}
+
+    def add(self, groupname, docid, sortkey):
+        if groupname not in self.bestids or sortkey < self.bestkeys[groupname]:
+            self.bestids[groupname] = docid
+            self.bestkeys[groupname] = sortkey
+
+    def as_dict(self):
+        return self.bestids
+
+
 #
 #
 #

File tests/test_sorting.py

         assert_equal(gs["apple"], [4, 2, 0])
         assert_equal(gs["bear"], [5, 3, 1])
 
+def test_group_types():
+    schema = fields.Schema(a=fields.STORED, b=fields.TEXT, c=fields.ID)
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(a=0, b=u("blah"), c=u("apple"))
+        w.add_document(a=1, b=u("blah blah"), c=u("bear"))
+        w.add_document(a=2, b=u("blah blah blah"), c=u("apple"))
+        w.add_document(a=3, b=u("blah blah blah blah"), c=u("bear"))
+        w.add_document(a=4, b=u("blah blah blah blah blah"), c=u("apple"))
+        w.add_document(a=5, b=u("blah blah blah blah blah blah"), c=u("bear"))
+        w.add_document(a=6, b=u("blah blah blah blah blah blah blah"), c=u("apple"))
 
+    with ix.searcher() as s:
+        q = query.Term("b", "blah")
 
+        f = sorting.FieldFacet("c", maptype=sorting.UnorderedList)
+        r = s.search(q, groupedby=f)
+        gs = r.groups("c")
+        assert_equal(gs["apple"], [0, 2, 4, 6])
+        assert_equal(gs["bear"], [1, 3, 5])
 
+        f = sorting.FieldFacet("c", maptype=sorting.Count)
+        r = s.search(q, groupedby=f)
+        gs = r.groups("c")
+        assert_equal(gs["apple"], 4)
+        assert_equal(gs["bear"], 3)
 
+        f = sorting.FieldFacet("c", maptype=sorting.Best)
+        r = s.search(q, groupedby=f)
+        gs = r.groups()
+        assert_equal(gs["apple"], 6)
+        assert_equal(gs["bear"], 5)
+
+        r = s.search(q, groupedby="c", maptype=sorting.Count)
+        gs = r.groups()
+        assert_equal(gs["apple"], 4)
+        assert_equal(gs["bear"], 3)
+
+
+