Commits

Anonymous committed 3106cc0

Drop murmur3 code
tab cleanup
use a single random seed again

  • Participants
  • Parent commits 1cbd524
  • Branches randomhash

Comments (0)

Files changed (4)

File Include/pyhash.h

 #ifndef Py_PYHASH_H
 #define Py_PYHASH_H
 
-#define PY_RND_HASH_SEED_SIZE 8
-
-PyAPI_DATA(uint32_t) Py_RndHashSeed[PY_RND_HASH_SEED_SIZE];
-PyAPI_FUNC(uint32_t *) Py_GetRndHashSeed(void);
-
-/* move to unicodeobject.h ? */
-Py_hash_t _Py_HashUnicode(PyObject *);
+PyAPI_DATA(Py_hash_t) Py_RndHashSeed;
+PyAPI_FUNC(Py_hash_t) Py_GetRndHashSeed(void);
 
 #endif

File Include/pyrandom.h

+#ifndef Py_PYRANDOM_H
+#define Py_PYRANDOM_H
+
 int PyOS_URandom(unsigned char *buf, Py_ssize_t len);
 
 #define _Py_MT_N 624
 void _Py_MT_GenRand_Init(_Py_MT_RandomState *state, unsigned long seed); // init_genrand()
 void _Py_MT_GenRand_InitArray(_Py_MT_RandomState *state, unsigned long init_key[], unsigned long key_length); // init_by_array
 
+#endif

File Python/hash.c

 #include "Python.h"
 #include "pyrandom.h"
-#define SEED_SIZE 8
 
-uint32_t Py_RndHashSeed[SEED_SIZE] = {-1};
+Py_hash_t Py_RndHashSeed = -1;
 
 void
 _Py_InitRndHashSeed(void)
 {
-	static int initialized = 0;
-	int result;
-	if (initialized) {
-		return;
-	}
-	initialized = 1;
+    static int initialized = 0;
+    int result;
+    if (initialized) {
+        return;
+    }
+    initialized = 1;
 
-	result = PyOS_URandom((unsigned char*)Py_RndHashSeed, SEED_SIZE * 4);
-	/* fall back to Mersenne twister */
-	if (result == -1) {
-		_Py_MT_RandomState state;
-		time_t seed;
-		int i;
+    result = PyOS_URandom((unsigned char*)&Py_RndHashSeed, sizeof(Py_hash_t));
+    /* fall back to Mersenne twister */
+    if (result == -1) {
+        _Py_MT_RandomState state;
+        time_t seed;
+        int i;
 
-		time(&seed);
-		_Py_MT_GenRand_Init(&state, seed);
-		for (i=0; i < SEED_SIZE; i++) {
-			Py_RndHashSeed[i] = _Py_MT_GenRand_Int32(&state);
-		}
-		if (Py_RndHashSeed[0] == -1) {
-			Py_RndHashSeed[0] = -2;
-		}
+        time(&seed);
+        _Py_MT_GenRand_Init(&state, seed);
 
-	}
+        Py_RndHashSeed = _Py_MT_GenRand_Int32(&state);
+        if (Py_RndHashSeed == -1) {
+            Py_RndHashSeed = -2;
+        }
+
+    }
 }
 
-
-
-/* forward declarations */
-void MurmurHash3_x86_32  (const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out);
-void MurmurHash3_x64_64  (const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out);
-void MurmurHash3_x86_128 (const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out);
-void MurmurHash3_x64_128 (const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out);
-
-#if SIZEOF_SIZE_T == 4
-#define MURMUR_HASH_3(key, len, out) MurmurHash3_x86_32(key, len, Py_RndHashSeed, out)
-#else
-#define MURMUR_HASH_3(key, len, out) MurmurHash3_x64_64(key, len, Py_RndHashSeed, out)
-#endif
-
 /* Randomized hash seed */
-uint32_t *
+Py_hash_t
 Py_GetRndHashSeed(void)
 {
-	return Py_RndHashSeed;
+    return Py_RndHashSeed;
 }
-
-
-Py_hash_t
-_Py_HashBytes(unsigned char *p, Py_ssize_t len)
-{
-    Py_uhash_t x;
-
-    MURMUR_HASH_3(p, len, &x);
-
-    return x;
-}
-
-Py_hash_t
-_Py_HashUnicode(PyObject *ob)
-{
-	Py_ssize_t len;
-    Py_uhash_t x;
-
-    assert(PyUnicode_CheckExact(ob));
-
-    len = PyUnicode_GET_LENGTH(ob);
-
-    switch (PyUnicode_KIND(ob)) {
-    case PyUnicode_1BYTE_KIND: {
-        const unsigned char *c = PyUnicode_1BYTE_DATA(ob);
-        MURMUR_HASH_3(c, len, &x);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        const Py_UCS2 *s = PyUnicode_2BYTE_DATA(ob);
-        MURMUR_HASH_3(s, 2 * len, &x);
-        break;
-    }
-    default: {
-        Py_UCS4 *l;
-        assert(PyUnicode_KIND(ob) == PyUnicode_4BYTE_KIND &&
-               "Impossible switch case in unicode_hash");
-        l = PyUnicode_4BYTE_DATA(ob);
-        MURMUR_HASH_3(l, 4 * len, &x);
-        break;
-    }
-    }
-    return x;
-}
-
-void
-MurmurHash3_x64_64(const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out)
-{
-        uint64_t tmp[2];
-
-        MurmurHash3_x64_128(key, len, seed, tmp);
-
-        *(uint64_t*)out = tmp[0];
-}
-
-/* MurMurHash3 modified by Christian Heimes
- *
- * - C++ to C89 port
- * - Replaced some simple inline functions with macros
- * - Py_ssize_t instead of int
- * - Increased seed size for 128 bit hash
- * - Additional seed for tail
- * - simple MurmurHash3_x64_64()
- */
-
-/*-----------------------------------------------------------------------------
- * MurmurHash3 was written by Austin Appleby, and is placed in the public
- * domain. The author hereby disclaims copyright to this source code.
- *
- * Note - The x86 and x64 versions do _not_ produce the same results, as the
- * algorithms are optimized for their respective platforms. You can still
- * compile and run any of them on any platform, but your performance with the
- * non-native version will be less than optimal.
- *
- *-----------------------------------------------------------------------------
- * Platform-specific functions and macros
- */
-
-/* Microsoft Visual Studio */
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE  __forceinline
-
-#define ROTL32(x,y)  _rotl(x,y)
-#define ROTL64(x,y)  _rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-#else
-/* Other compilers */
-
-#define FORCE_INLINE __attribute__((always_inline))
-
-/*
-inline uint32_t
-rotl32( uint32_t x, int8_t r)
-{
-    return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t
-rotl64( uint64_t x, int8_t r)
-{
-    return (x << r) | (x >> (64 - r));
-}
-*/
-
-#define ROTL32(x, r)  (x << r) | (x >> (32 - r))
-#define ROTL64(x, r)  (x << r) | (x >> (64 - r))
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif
-/*-----------------------------------------------------------------------------
- * Block read - if your platform needs to do endian-swapping or can only
- * handle aligned reads, do the conversion here
- */
-
-/*
-FORCE_INLINE uint32_t
-getblock32(const uint32_t * p, Py_ssize_t i) {
-    return p[i];
-}
-
-FORCE_INLINE uint64_t
-getblock64(const uint64_t * p, Py_ssize_t i) {
-    return p[i];
-}
-*/
-
-#define getblock32(p, i) p[i]
-#define getblock64(p, i) p[i]
-
-/*-----------------------------------------------------------------------------
-// Finalization mix - force all bits of a hash block to avalanche
- */
-
-FORCE_INLINE uint32_t
-fmix32(uint32_t h) {
-    h ^= h >> 16;
-    h *= 0x85ebca6b;
-    h ^= h >> 13;
-    h *= 0xc2b2ae35;
-    h ^= h >> 16;
-
-    return h;
-}
-
-FORCE_INLINE uint64_t
-fmix64(uint64_t k) {
-    k ^= k >> 33;
-    k *= BIG_CONSTANT(0xff51afd7ed558ccd);
-    k ^= k >> 33;
-    k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
-    k ^= k >> 33;
-
-    return k;
-}
-
-void
-MurmurHash3_x86_32(const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out) {
-    const uint8_t * data = (const uint8_t*) key;
-    const Py_ssize_t nblocks = len / 4;
-    Py_ssize_t i;
-
-    uint32_t h1 = seed[0];
-
-    uint32_t c1 = 0xcc9e2d51;
-    uint32_t c2 = 0x1b873593;
-
-    /* body */
-
-    const uint32_t * blocks = (const uint32_t *) (data + nblocks * 4);
-
-    for (i = -nblocks; i; i++) {
-        uint32_t k1 = getblock32(blocks, i);
-
-        k1 *= c1;
-        k1 = ROTL32(k1, 15);
-        k1 *= c2;
-
-        h1 ^= k1;
-        h1 = ROTL32(h1, 13);
-        h1 = h1 * 5 + 0xe6546b64;
-    }
-
-    /* tail */
-
-    const uint8_t * tail = (const uint8_t*) (data + nblocks * 4);
-
-    uint32_t k1 = seed[4];
-
-    switch (len & 3) {
-    case 3:
-        k1 ^= tail[2] << 16;
-    case 2:
-        k1 ^= tail[1] << 8;
-    case 1:
-        k1 ^= tail[0];
-        k1 *= c1;
-        k1 = ROTL32(k1, 15);
-        k1 *= c2;
-        h1 ^= k1;
-    };
-
-    /* finalization */
-
-    h1 ^= len;
-
-    h1 = fmix32(h1);
-
-    *(uint32_t*)out = h1;
-}
-
-
-void
-MurmurHash3_x86_128(const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out)
-{
-    const uint8_t * data = (const uint8_t*) key;
-    const Py_ssize_t nblocks = len / 16;
-    Py_ssize_t i;
-
-    uint32_t h1 = seed[0];
-    uint32_t h2 = seed[1];
-    uint32_t h3 = seed[2];
-    uint32_t h4 = seed[3];
-
-    uint32_t c1 = 0x239b961b;
-    uint32_t c2 = 0xab0e9789;
-    uint32_t c3 = 0x38b34ae5;
-    uint32_t c4 = 0xa1e38b93;
-
-    /* body */
-
-    const uint32_t * blocks = (const uint32_t *) (data + nblocks * 16);
-
-    for (i = -nblocks; i; i++) {
-        uint32_t k1 = getblock32(blocks, i * 4 + 0);
-        uint32_t k2 = getblock32(blocks, i * 4 + 1);
-        uint32_t k3 = getblock32(blocks, i * 4 + 2);
-        uint32_t k4 = getblock32(blocks, i * 4 + 3);
-
-        k1 *= c1;
-        k1 = ROTL32(k1, 15);
-        k1 *= c2;
-        h1 ^= k1;
-
-        h1 = ROTL32(h1, 19);
-        h1 += h2;
-        h1 = h1 * 5 + 0x561ccd1b;
-
-        k2 *= c2;
-        k2 = ROTL32(k2, 16);
-        k2 *= c3;
-        h2 ^= k2;
-
-        h2 = ROTL32(h2, 17);
-        h2 += h3;
-        h2 = h2 * 5 + 0x0bcaa747;
-
-        k3 *= c3;
-        k3 = ROTL32(k3, 17);
-        k3 *= c4;
-        h3 ^= k3;
-
-        h3 = ROTL32(h3, 15);
-        h3 += h4;
-        h3 = h3 * 5 + 0x96cd1c35;
-
-        k4 *= c4;
-        k4 = ROTL32(k4, 18);
-        k4 *= c1;
-        h4 ^= k4;
-
-        h4 = ROTL32(h4, 13);
-        h4 += h1;
-        h4 = h4 * 5 + 0x32ac3b17;
-    }
-
-    /* tail */
-
-    const uint8_t * tail = (const uint8_t*) (data + nblocks * 16);
-
-    uint32_t k1 = seed[4];
-    uint32_t k2 = seed[5];
-    uint32_t k3 = seed[6];
-    uint32_t k4 = seed[7];
-
-    switch (len & 15) {
-    case 15:
-        k4 ^= tail[14] << 16;
-    case 14:
-        k4 ^= tail[13] << 8;
-    case 13:
-        k4 ^= tail[12] << 0;
-        k4 *= c4;
-        k4 = ROTL32(k4, 18);
-        k4 *= c1;
-        h4 ^= k4;
-
-    case 12:
-        k3 ^= tail[11] << 24;
-    case 11:
-        k3 ^= tail[10] << 16;
-    case 10:
-        k3 ^= tail[9] << 8;
-    case 9:
-        k3 ^= tail[8] << 0;
-        k3 *= c3;
-        k3 = ROTL32(k3, 17);
-        k3 *= c4;
-        h3 ^= k3;
-
-    case 8:
-        k2 ^= tail[7] << 24;
-    case 7:
-        k2 ^= tail[6] << 16;
-    case 6:
-        k2 ^= tail[5] << 8;
-    case 5:
-        k2 ^= tail[4] << 0;
-        k2 *= c2;
-        k2 = ROTL32(k2, 16);
-        k2 *= c3;
-        h2 ^= k2;
-
-    case 4:
-        k1 ^= tail[3] << 24;
-    case 3:
-        k1 ^= tail[2] << 16;
-    case 2:
-        k1 ^= tail[1] << 8;
-    case 1:
-        k1 ^= tail[0] << 0;
-        k1 *= c1;
-        k1 = ROTL32(k1, 15);
-        k1 *= c2;
-        h1 ^= k1;
-    };
-
-    /* finalization */
-
-    h1 ^= len;
-    h2 ^= len;
-    h3 ^= len;
-    h4 ^= len;
-
-    h1 += h2;
-    h1 += h3;
-    h1 += h4;
-    h2 += h1;
-    h3 += h1;
-    h4 += h1;
-
-    h1 = fmix32(h1);
-    h2 = fmix32(h2);
-    h3 = fmix32(h3);
-    h4 = fmix32(h4);
-
-    h1 += h2;
-    h1 += h3;
-    h1 += h4;
-    h2 += h1;
-    h3 += h1;
-    h4 += h1;
-
-    ((uint32_t*)out)[0] = h1;
-    ((uint32_t*)out)[1] = h2;
-    ((uint32_t*)out)[2] = h3;
-    ((uint32_t*)out)[3] = h4;
-}
-
-
-void
-MurmurHash3_x64_128(const void * key, const Py_ssize_t len, const uint32_t seed[8], void * out)
-{
-	const uint8_t * data = (const uint8_t*)key;
-    const Py_ssize_t nblocks = len / 16;
-    Py_ssize_t i;
-
-    uint64_t h1 = seed[0] + ((uint64_t)seed[1] << 32);
-    uint64_t h2 = seed[2] + ((uint64_t)seed[3] << 32);
-
-    uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
-    uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
-
-    /* body */
-
-    const uint64_t * blocks = (const uint64_t *) (data);
-
-    for (i = 0; i < nblocks; i++) {
-        uint64_t k1 = getblock64(blocks, i * 2 + 0);
-        uint64_t k2 = getblock64(blocks, i * 2 + 1);
-
-        k1 *= c1;
-        k1 = ROTL64(k1, 31);
-        k1 *= c2;
-        h1 ^= k1;
-
-        h1 = ROTL64(h1, 27);
-        h1 += h2;
-        h1 = h1 * 5 + 0x52dce729;
-
-        k2 *= c2;
-        k2 = ROTL64(k2, 33);
-        k2 *= c1;
-        h2 ^= k2;
-
-        h2 = ROTL64(h2, 31);
-        h2 += h1;
-        h2 = h2 * 5 + 0x38495ab5;
-    }
-
-    /* tail */
-
-    const uint8_t * tail = (const uint8_t*) (data + nblocks * 16);
-
-    uint64_t k1 = seed[4] + ((uint64_t)seed[5] << 32);;
-    uint64_t k2 = seed[6] + ((uint64_t)seed[7] << 32);;
-
-    switch (len & 15) {
-    case 15:
-        k2 ^= (uint64_t)(tail[14]) << 48;
-    case 14:
-        k2 ^= (uint64_t)(tail[13]) << 40;
-    case 13:
-        k2 ^= (uint64_t)(tail[12]) << 32;
-    case 12:
-        k2 ^= (uint64_t)(tail[11]) << 24;
-    case 11:
-        k2 ^= (uint64_t)(tail[10]) << 16;
-    case 10:
-        k2 ^= (uint64_t)(tail[9]) << 8;
-    case 9:
-        k2 ^= (uint64_t)(tail[8]) << 0;
-        k2 *= c2;
-        k2 = ROTL64(k2, 33);
-        k2 *= c1;
-        h2 ^= k2;
-
-    case 8:
-        k1 ^= (uint64_t)(tail[7]) << 56;
-    case 7:
-        k1 ^= (uint64_t)(tail[6]) << 48;
-    case 6:
-        k1 ^= (uint64_t)(tail[5]) << 40;
-    case 5:
-        k1 ^= (uint64_t)(tail[4]) << 32;
-    case 4:
-        k1 ^= (uint64_t)(tail[3]) << 24;
-    case 3:
-        k1 ^= (uint64_t)(tail[2]) << 16;
-    case 2:
-        k1 ^= (uint64_t)(tail[1]) << 8;
-    case 1:
-        k1 ^= (uint64_t)(tail[0]) << 0;
-        k1 *= c1;
-        k1 = ROTL64(k1, 31);
-        k1 *= c2;
-        h1 ^= k1;
-    };
-
-    /* finalization */
-
-    h1 ^= len;
-    h2 ^= len;
-
-    h1 += h2;
-    h2 += h1;
-
-    h1 = fmix64(h1);
-    h2 = fmix64(h2);
-
-    h1 += h2;
-    h2 += h1;
-
-    ((uint64_t*)out)[0] = h1;
-    ((uint64_t*)out)[1] = h2;
-}
-
-/* end of Murmur3 code
-  -----------------------------------------------------------------------------
-*/
-

File Python/random.c

 
 /* raise a fatal error when the interpreter is in the early initialization
  * phase
- * XXX: replace with something better than Py_RndHashSeed[0] == -1
+ * XXX: replace with something better than Py_RndHashSeed == -1
  * */
 
 #define error_string(exc, msg) do {          \
-	if (Py_RndHashSeed[0] == -1) {           \
+    if (Py_RndHashSeed == -1) {           \
         Py_FatalError(msg);                  \
     } else {                                 \
-	    PyErr_SetString(exc, msg);           \
+        PyErr_SetString(exc, msg);           \
     }                                        \
     } while(0)
 
 #define error_errno(exc, filename, msg) do { \
-	if (Py_RndHashSeed[0] == -1) {           \
+    if (Py_RndHashSeed == -1) {           \
         Py_FatalError(msg);                  \
     } else {                                 \
-	    PyErr_SetFromErrnoWithFilename(exc, filename); \
+        PyErr_SetFromErrnoWithFilename(exc, filename); \
     }                                        \
     } while(0)
 
 #ifdef MS_WINDOWS
 
 #define win32_error(function, filename) do { \
-	if (Py_RndHashSeed[0] == -1) {           \
+    if (Py_RndHashSeed == -1) {           \
         Py_FatalError(function);             \
     } else {                                 \
-	    errno = GetLastError();              \
-	    PyErr_SetFromWindowsErr(errno)       \
+        errno = GetLastError();              \
+        PyErr_SetFromWindowsErr(errno)       \
     }                                        \
     } while(0)
 
 PyOS_URandom(unsigned char *buf, Py_ssize_t len)
 {
     if (len < 0) {
-    	error_string(PyExc_ValueError,
-    			     "negative argument not allowed");
-    	return -1;
+        error_string(PyExc_ValueError,
+                     "negative argument not allowed");
+        return -1;
     }
 
     if (hCryptProv == 0) {
            This should not fail         */
         hAdvAPI32 = GetModuleHandle("advapi32.dll");
         if (hAdvAPI32 == NULL) {
-        	win32_error("GetModuleHandle", NULL);
-        	return -1
+            win32_error("GetModuleHandle", NULL);
+            return -1
         }
 
         /* Obtain pointers to the CryptoAPI functions
                                         hAdvAPI32,
                                         "CryptAcquireContextA");
         if (pCryptAcquireContext == NULL) {
-        	error_string(PyExc_NotImplementedError,
+            error_string(PyExc_NotImplementedError,
                         "CryptAcquireContextA not found");
             return -1;
         }
         pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
                                         hAdvAPI32, "CryptGenRandom");
         if (pCryptGenRandom == NULL) {
-        	error_string(PyExc_NotImplementedError,
+            error_string(PyExc_NotImplementedError,
                          "CryptGenRandom not found");
             return -1;
         }
  */
 int
 PyOS_URandom(unsigned char *buf, Py_ssize_t len) {
-	int fd;
-	ssize_t pos, result = 0;
+    int fd;
+    ssize_t pos, result = 0;
 
     if (len < 0) {
-    	error_string(PyExc_ValueError,
+        error_string(PyExc_ValueError,
                      "negative argument not allowed");
-    	return -1;
+        return -1;
     }
 
-	if ((fd = open(DEV_URANDOM, O_RDONLY)) == -1) {
-		error_errno(PyExc_OSError, DEV_URANDOM, "Can't open /dev/urandom");
-		return -1;
-	}
+    if ((fd = open(DEV_URANDOM, O_RDONLY)) == -1) {
+        error_errno(PyExc_OSError, DEV_URANDOM, "Can't open /dev/urandom");
+        return -1;
+    }
 
-	while (pos < len) {
-		if ((result = read(fd, buf+pos, len-pos)) == -1) {
-			close(fd);
-			error_errno(PyExc_OSError, DEV_URANDOM, "Error reading from /dev/urandom");
-			return -1;
-		}
-		pos += result;
-	}
-	close(fd);
-	return 0;
+    while (pos < len) {
+        if ((result = read(fd, buf+pos, len-pos)) == -1) {
+            close(fd);
+            error_errno(PyExc_OSError, DEV_URANDOM, "Error reading from /dev/urandom");
+            return -1;
+        }
+        pos += result;
+    }
+    close(fd);
+    return 0;
 }
 
 #endif