Commits

Jonathan Eunice committed b9d74ea

improved quality of call signature fingerprinting

Comments (0)

Files changed (2)

 
 setup(
     name='mementos',
-    version=verno("0.457"),
+    version=verno("0.464"),
     author='Jonathan Eunice',
     author_email='jonathan.eunice@gmail.com',
     description='Memoizing metaclass. Drop-dead simple way to create cached objects',
     assert b3 is not b1
     assert b3 is not b2
     assert b3.name == 'andy'
-    
+
 @pytest.mark.skipif("sys.version_info < (2,7)")
-def test_perfect_signatures():
+def test_hasher_signatures():
     # use inspect.getcallargs to make signatures that dont vary
     
+    # alternate way of constructing hash key - seems more generally applicable
+    
     from inspect import getcallargs
-    import hashlib
+    from collections import Mapping, Iterable
     
+    def hasher(c):
+        """
+        Return a hash for a collection, even if it's putatively unhashable (i.e.
+        contains any dicts or lists). Does this by traversing the collection and
+        accumulating the hash of its subvalues, then hashing a hashable
+        collection of those sub-hashes. Works for basic types like strings,
+        lists, dicts, and collections of same. Does not try to manage complex
+        objects, unless they are already hashable. Works by hashing the current
+        values; if the collection (or any sub-collection) is mutable and
+        changes, the subsequent hasher() calls on the same object will return
+        different values. Also, all collections of a given type (dict and OrderedDict,
+        say, or list, tuple, and set) are collapsed to the same hash result.
+        Whether this weaker, type-collapsing, value-oriented hash function
+        suits your purposes will depend on the purposes.
+        
+        For canonicalizing call fingerprints for memoizing object creation, it
+        works well, because such instantiation-time memoizing rarely needs to
+        involve complex objects, and instantiation arguments are typically of
+        simple-ish types (often literals), and they tend to crisply define the
+        object's core 'identity'.
+        
+        If you need a slightly stronger version that differentiates dict from
+        OrderedDict, say, you could create a variant that adds type(c) to the
+        constructed tuple before returning the tuple hash.
+        """
+        try:
+            return hash(c)
+        except TypeError:
+            if isinstance(c, Mapping):
+                subhash = []
+                for k in sorted(c.keys()):
+                    subhash.append(hash(k))
+                    subhash.append(hasher(c[k]))
+                return hash(tuple(subhash))
+            elif isinstance(c, Iterable):
+                return hash(tuple(hasher(item) for item in c))
+            else:
+                raise TypeError('cant figure out ' + repr(c))
+            
     def call_fingerprint(cls, args, kwargs):
         """
         Given a complex __init__ call with varied positional, keyward, variable,
         that might otherwise be given positionally that causes key signatures to
         vary, and in which the primary args (ie, not * or ** values) are simple
         scalars.
+        """
+        return hasher(getcallargs(cls.__init__, None, *args, **kwargs))
         
-        In cases where objects or dicts may be passed, or * and ** values are
-        used, a deep recursive flattening and sorting of values and key/value
-        pairs must be done. This 1% of the 1% use case is beyond the scope here.
-        """
-        callargs = getcallargs(cls.__init__, None, *args, **kwargs)
-        callitems = list(callargs.items())
-        callitems.sort()
-        h = hashlib.md5()
-        h.update(repr(callitems).encode('utf-8'))
-        return h.hexdigest()
-        
-    Perfect = memento_factory("Perfect", call_fingerprint)
+    Perfect2 = memento_factory("Perfect", call_fingerprint)
     
-    class PerfectCall(with_metaclass(Perfect, object)):
+    class PerfectCall(with_metaclass(Perfect2, object)):
         def __init__(self, name, a=1, b=2, c=3, *args, **kwargs):
             self.vector = (name, a, b, c)
     
     
     p7 = PerfectCall(**{'name': 'amy', 'b': 33})
     assert p6 is p7
+    
+    p8  = PerfectCall("bill", woot='more!', nixnax=3304)
+    p9  = PerfectCall(**dict(woot='more!', nixnax=3304, name='bill'))
+    p10 = PerfectCall(**dict([('nixnax', 3304), ('woot', 'more!'), ('name', 'bill')]))
+    assert p8 is p9 is p10
+