1. Pypy
  2. Untitled project
  3. pypy


Armin Rigo  committed 39a9093

Avoid using hashes that are directly the address. It gives a few
lower bits that are always 0, leading to many more collisions in
dictionaries. CPython fixed this in 2009 by mangling the value
in a similar way.

  • Participants
  • Parent commits b6be846
  • Branches default

Comments (0)

Files changed (3)

File pypy/rpython/memory/gc/minimark.py

View file
  • Ignore whitespace
 from pypy.rpython.lltypesystem.llmemory import raw_malloc_usage
 from pypy.rpython.memory.gc.base import GCBase, MovingGCBase
 from pypy.rpython.memory.gc import minimarkpage, env
+from pypy.rpython.memory.support import mangle_hash
 from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask, r_uint
 from pypy.rlib.rarithmetic import LONG_BIT_SHIFT
 from pypy.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
         return self.id_or_identityhash(gcobj, False)
     def identityhash(self, gcobj):
-        return self.id_or_identityhash(gcobj, True)
+        return mangle_hash(self.id_or_identityhash(gcobj, True))
     # ----------

File pypy/rpython/memory/lldict.py

View file
  • Ignore whitespace
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.lltypesystem import rdict
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rpython.memory.support import mangle_hash
 # This is a low-level AddressDict, reusing a lot of the logic from rdict.py.
 # xxx this is very dependent on the details of rdict.py
     lltype.free(entries, flavor="raw")
     if not we_are_translated(): count_alloc(-1)
-_hash = llmemory.cast_adr_to_int
+def _hash(adr):
+    return mangle_hash(llmemory.cast_adr_to_int(adr))
 def dict_keyhash(d, key):
     return _hash(key)

File pypy/rpython/memory/support.py

View file
  • Ignore whitespace
 from pypy.rlib.debug import ll_assert
 from pypy.tool.identity_dict import identity_dict
+def mangle_hash(i):
+    # To hash pointers in dictionaries.  Assumes that i shows some
+    # alignment (to 4, 8, maybe 16 bytes), so we use the following
+    # formula to avoid the trailing bits being always 0.
+    return i ^ (i >> 4)
+# ____________________________________________________________