Jonathan Eunice avatar Jonathan Eunice committed e02bd39

changed hashing strategy

Comments (0)

Files changed (3)

intensional/contenthash.py

+
+
+# from show import show
+import sys
+
+_PY3 = (sys.version_info[0] > 2)
+
+HASH       = 1    # hash ALWAYS considered for primitive types
+ID         = 2
+TYPE       = 4
+CODE       = 8
+ATTRIBUTES = 16
+ITEMS      = 32
+
+DEFAULT_CONSIDER = CODE | ATTRIBUTES | ITEMS
+
+STRING_TYPES = [str, bytes] if _PY3 else [str, unicode]
+PRIMITIVE_TYPES = tuple(STRING_TYPES + [int, float, complex])
+
+FUNC_ATTR = '__code__' if _PY3 else 'func_code'
+
+def chash(obj, considering=DEFAULT_CONSIDER, seen=None):
+    """
+    Return a content hash of the given object.
+    """
+    
+    # show(obj)
+    
+    if seen is None:
+        seen = set()
+        
+    if isinstance(obj, PRIMITIVE_TYPES):
+        return hash(obj)
+    
+    if isinstance(obj, tuple):
+        try:
+            return hash(obj)
+        except TypeError:
+            return hash(tuple([chash(item, considering, seen) for item in obj]))
+        # if tuple is unhashable (ie, contains unhashable things), treat it as
+        # a collection/list
+        
+    # general cyclic item detection
+    _id = id(obj)
+    if _id in seen:
+        return 0
+    seen.add(_id)
+        
+    hashval = 0
+    
+    if considering & HASH:
+        hashval ^= hash(obj)
+        
+    if considering & TYPE:
+        hashval ^= hash(type(obj))
+        
+    if considering & ID:
+        hashval ^= _id    
+
+    if considering & CODE:                    # CODE is the most important attribute, overriding all others
+        code = getattr(obj, FUNC_ATTR, None)
+        if code:
+            hashval ^= hash(code)
+    elif considering & ATTRIBUTES: 
+        try:
+            hashval ^= chash(obj.__dict__, considering, seen)
+        except AttributeError:
+            pass
+    
+    if considering & ITEMS and hasattr(obj, '__iter__'):
+        if hasattr(obj, 'items'):
+            for item in obj.items():
+                hashval ^= chash(item, considering, seen)
+        else:
+            for itemtup in enumerate(obj):
+                hashval ^= chash(itemtup, considering, seen)
+    
+    return hashval
+    # should we use a bit-spreading hash here as a final salting operation?
+    
+    # ideally would use enumerate any time order is important, such as OrderedDict and such,
+    # even if it has an items() attribute
+    # hasattr(o, '__reversed__') may => ordered
+    
+    # for sets and other non-ordered types, we depend on the property that
+    # however constructed and in whatver order, will iterate in a given order
+    # tested - seems to work
+    
+def _demo_chash():
+    from collections import OrderedDict, Counter
+    from show import show
+    
+    class O(object):
+        one = 1
+        two = 2
+            
+    testitems = [
+        1,
+        2.3,
+        2.3+5.6j,
+        'this is a string',
+        (1, 4),
+        ('this', 'that'),
+        O,
+        O(),
+        [1, 2, 3, 4],
+        [1, 2, 3, 4],
+        range(3),
+        xrange(3),
+        xrange(4), 
+        xrange(4),
+        eval('xrange(4)'),
+        lambda x: x + 1,
+        lambda x: x + 1,
+        eval('lambda x: x + 1'),
+        lambda x: x + 2,
+        {'a':1, 'b':12, 'c':99},
+        {'a':1, 'b':12, 'c':99},
+        OrderedDict([('a', 1), ('b', 12), ('c', 99)]),
+        Counter({'a':1, 'b':12, 'c':99}),
+        [1,[2,3,[4,5]]],
+        {'a':1, 'b':12, 'c':99,
+         'od': OrderedDict([('a', 1), ('b', 12), ('c', 99)]),
+         'l': [1,[2,3,[4,5]]],
+         'cc': Counter({'a':1, 'b':12, 'c':99})
+        },
+    ]
+    
+    
+    for item in testitems:
+        try:
+            h = hash(item)
+        except:
+            h = None
+        show(item, chash(item), h)
+    
+# _demo_chash()
+    

intensional/core.py

 # import six
 import sys, copy
 import collections
-from intensional.superhash import superhash
+from intensional.contenthash import chash
 
 if sys.version_info[0] > 2:
     unicode = str
     basestring = str
 
-SuperHashMeta = memento_factory('SuperHashMeta',
-                            lambda cls, args, kwargs: (cls, superhash(args)) )
+HashMeta = memento_factory('HashMeta',
+                            lambda cls, args, kwargs: (cls, chash(args)) )
 
 class IntensionalSet(object):
     """
     def __getitem__(self, index):
         return self._match.group(index)
 
-class Re(with_metaclass(SuperHashMeta, IntensionalSet)):
+class Re(with_metaclass(HashMeta, IntensionalSet)):
     
     # convenience copy of re flag constants
     
     def escape(self, *args, **kwargs):
         return self.re.escape(*args, **kwargs)
 
-class Glob(with_metaclass(SuperHashMeta, IntensionalSet)):
+class Glob(with_metaclass(HashMeta, IntensionalSet)):
     """
     An item matches a Glob via Unix filesystem glob semantics.
     
     def __contains__(self, item):
         return fnmatch.fnmatch(str(item), self.pattern)
 
-class Instances(with_metaclass(SuperHashMeta, IntensionalSet)):
+class Instances(with_metaclass(HashMeta, IntensionalSet)):
     """
     An object is in an IsInstance if it is an instance of the given types.
     """
     return False
 
     
-class Any(with_metaclass(SuperHashMeta, IntensionalSet)):
+class Any(with_metaclass(HashMeta, IntensionalSet)):
     """
     An item is in an Any if it is or is in any member of the set.
     """
                 return True
         return False
     
-class Every(with_metaclass(SuperHashMeta, IntensionalSet)):
+class Every(with_metaclass(HashMeta, IntensionalSet)):
     """
     An item is in an Every if it is or is in every member of the set.
     """
                 return False
         return True
 
-class ButNot(with_metaclass(SuperHashMeta, IntensionalSet)):
+class ButNot(with_metaclass(HashMeta, IntensionalSet)):
     """
     An item is in a ButNot if it's in the primary set and not the exclusion.
     """
     
     # probably need to guard against type errors here
     
-class EitherOr(with_metaclass(SuperHashMeta, IntensionalSet)):
+class EitherOr(with_metaclass(HashMeta, IntensionalSet)):
     """
     An item is in an EitherOr if it's in subseta or subset b, but not both.
     """
         else:
             return False
         
-class Test(with_metaclass(SuperHashMeta, IntensionalSet)):
+class Test(with_metaclass(HashMeta, IntensionalSet)):
     """
     Test is a generic wrapper around lambda expressions.
     Provides special support for compact, neat expressions by not
 
 setup(
     name='intensional',
-    version=verno("0.216"),
+    version=verno("0.219"),
     author='Jonathan Eunice',
     author_email='jonathan.eunice@gmail.com',
     description='Intensional sets in Python',
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.