Commits

php committed 8beca4f

libcuckoo - A cuckoo hashing library for C

  • Participants

Comments (0)

Files changed (7)

+*.a
+*.o
+*.swp
+cuckoo
+sources = cuckoo.c hash.c
+
+cuckoo: cuckoo.a
+	gcc -ggdb -std=c99 -lm -o $@ main.c cuckoo.a
+
+cuckoo.a: $(sources)
+	gcc -ggdb -c -std=c99 -lm $(sources)
+	ar rs $@ $(sources:.c=.o)
+
+
+clean:
+	rm cuckoo.a
+	rm cuckoo
+
+.PHONY: clean
+/*
+ * cuckoo hashing library
+ *
+ * (c) 2011 David Soria Parra
+ */
+#define _BSD_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cuckoo.h"
+#include "hash.h"
+
+#define EPSILON 0.25
+#define h1(t, k) ck_hash(t->size, t->h1, k)
+#define h2(t, k) ck_hash(t->size, t->h2, k)
+
+/* forward declaration */
+static int ck_hash_rehash(ck_hash_t *t);
+
+static
+uint32_t ck_hash (uint32_t length, uint32_t r, const char *key) {
+    uint32_t k;
+    k = hash(key, length);
+
+    return (k + r) % length;
+}
+
+static inline void ck_reinit_hashfunc(ck_hash_t *t)
+{
+    int f;
+    f = open("/dev/random", O_RDONLY);
+    read(f, &t->h1, sizeof(uint32_t));
+    read(f, &t->h2, sizeof(uint32_t));
+    close(f);
+}
+
+static inline int
+ck_hash_new_bucket(ck_hash_t *t, ck_elm_t *e)
+{
+    uint32_t k, i;
+    void *tmp;
+
+    /* we abort if we use more than log(n) iterations. runtime is now
+     * as bad as binary search (O(log n))*/
+    for (i = 0; i < t->abort; i++) {
+        k = h2(t, e->key);
+
+        if (NULL == t->buckets[k]) {
+            t->buckets[k] = e;
+            return 0;
+        }
+
+        k = h1(t, e->key);
+        if (NULL == t->buckets[k]) {
+            t->buckets[k] = e;
+            return 0;
+        } else {
+            tmp = t->buckets[k];
+            t->buckets[k] = e;
+            e = tmp;
+        }
+    }
+
+    if (NULL == t->buckets[k]) {
+        t->buckets[k] = e;
+        return 0;
+    }
+
+    ck_hash_rehash(t);
+    return ck_hash_new_bucket(t, e);
+}
+
+static int
+ck_hash_rehash(ck_hash_t *t)
+{
+    int f;
+    uint32_t i;
+    ck_elm_t ** obuckets;
+
+    /* new buckets */
+    obuckets = t->buckets;
+
+    t->buckets = (ck_elm_t **) malloc(t->size * sizeof(ck_elm_t *));
+
+retry:
+    memset(t->buckets, 0, t->size * sizeof(ck_elm_t *));
+    ck_reinit_hashfunc(t);
+    /* reinitialize hash functions */
+
+    for (i = 0; i < t->size; i++) {
+        uint32_t k;
+        ck_elm_t * old;
+
+        old = obuckets[i];
+        if (NULL == old) {
+            continue;
+        }
+
+        k = h1(t, old->key);
+        if (NULL == t->buckets[k]) {
+            t->buckets[k] = obuckets[i];
+        } else {
+            k = h2(t, old->key);
+            if (NULL == t->buckets[k]) {
+                t->buckets[k] = obuckets[i];
+            } else if (ck_hash_new_bucket(t, t->buckets[k]) < 0) {
+                goto retry;
+            }
+        }
+    }
+
+    return 0;
+}
+
+void
+ck_hash_init(ck_hash_t *t, uint32_t size)
+{
+    int f;
+
+    memset(t, 0, sizeof(ck_hash_t));
+    t->size  = (uint32_t) 2 * (1 + EPSILON) * size;
+    t->abort = ceil(log(t->size));
+
+    ck_reinit_hashfunc(t);
+
+    t->buckets = (ck_elm_t **) malloc(t->size * sizeof(ck_elm_t *));
+    memset(t->buckets, 0, t->size * sizeof(ck_elm_t *));
+
+    if (!t->buckets) {
+        perror("cannot allocate memory");
+        exit(-1);
+    }
+
+}
+
+int
+ck_hash_insert(ck_hash_t *t, char * key, void * element)
+{
+    uint32_t k;
+    ck_elm_t * e;
+
+    e = (ck_elm_t *) malloc(sizeof(ck_elm_t));
+    e->key = key; //strdup(key);
+    e->elm = element;
+
+    k = h1(t, key);
+    assert(k < t->size);
+
+    if (t->buckets[k]) {
+        return ck_hash_new_bucket(t, e);
+    } else {
+        t->buckets[k] = e;
+    }
+
+    return 0;
+}
+
+void *
+ck_hash_lookup(ck_hash_t *t, const char * key)
+{
+    uint32_t k;
+
+    k = h1(t, key);
+
+    assert(k < t->size);
+    if (t->buckets[k] && strcmp(t->buckets[k]->key, key) == 0) {
+        return t->buckets[k]->elm;
+    }
+
+    k = h2(t, key);
+    assert(k < t->size);
+    if (t->buckets[k] && strcmp(t->buckets[k]->key, key) == 0) {
+        return t->buckets[k]->elm;
+    }
+
+    return NULL;
+}
+
+int
+ck_hash_remove(ck_hash_t *t, const char * key)
+{
+    /* do not forget to free the bucket element */
+}
+
+void
+ck_hash_destroy(ck_hash_t *t)
+{
+    for (t->size--; t->size >= 0; t->size--) {
+        if (t->buckets[t->size]) {
+            free(t->buckets[t->size]->key);
+            free(t->buckets[t->size]);
+        }
+    }
+    free(t->buckets);
+}
+#ifndef CUCKOO_H
+#define CUCKOO_H
+#include <stdint.h>
+
+#define MAX_LOOP 100
+
+typedef struct ck_elm {
+    char * key;
+    void * elm;
+} ck_elm_t;
+
+typedef struct ck_hash {
+    ck_elm_t ** buckets;
+    uint32_t size;
+    uint32_t abort;
+    uint32_t h1;
+    uint32_t h2;
+} ck_hash_t;
+
+void ck_hash_init(ck_hash_t *, uint32_t);
+int ck_hash_insert(ck_hash_t *, char * key, void * element);
+void* ck_hash_lookup(ck_hash_t *, const char * key);
+int ck_hash_remove(ck_hash_t *, const char * key);
+void ck_hash_destroy(ck_hash_t *);
+
+#endif /* end of include guard: CUCKOO_H */
+#include <stdint.h>
+#include "hash.h"
+/*
+ * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
+ *
+ * This is Daniel J. Bernstein's popular `times 33' hash function as
+ * posted by him years ago on comp.lang.c. It basically uses a function
+ * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
+ * known hash functions for strings. Because it is both computed very
+ * fast and distributes very well.
+ *
+ * The magic of number 33, i.e. why it works better than many other
+ * constants, prime or not, has never been adequately explained by
+ * anyone. So I try an explanation: if one experimentally tests all
+ * multipliers between 1 and 256 (as RSE did now) one detects that even
+ * numbers are not useable at all. The remaining 128 odd numbers
+ * (except for the number 1) work more or less all equally well. They
+ * all distribute in an acceptable way and this way fill a hash table
+ * with an average percent of approx. 86%. 
+ *
+ * If one compares the Chi^2 values of the variants, the number 33 not
+ * even has the best value. But the number 33 and a few other equally
+ * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
+ * advantage to the remaining numbers in the large set of possible
+ * multipliers: their multiply operation can be replaced by a faster
+ * operation based on just one shift plus either a single addition
+ * or subtraction operation. And because a hash function has to both
+ * distribute good _and_ has to be very fast to compute, those few
+ * numbers should be preferred and seems to be the reason why Daniel J.
+ * Bernstein also preferred it.
+ *
+ *
+ *                  -- Ralf S. Engelschall <rse@engelschall.com>
+ */
+
+inline uint32_t hash(const char *arKey, uint32_t nKeyLength)
+{
+	register uint32_t hash = 5381;
+
+	/* variant with the hash unrolled eight times */
+	for (; nKeyLength >= 8; nKeyLength -= 8) {
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+	}
+	switch (nKeyLength) {
+		case 7: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 6: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 5: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 4: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 3: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 2: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 1: hash = ((hash << 5) + hash) + *arKey++; break;
+		case 0: break;
+        default: break;
+	}
+	return hash;
+}
+#ifndef HASH_H
+#define HASH_H
+
+uint32_t hash(const char *, uint32_t);
+
+#endif /* end of include guard: HASH_H */
+#include <stdlib.h>
+#include <stdio.h>
+#include "cuckoo.h"
+
+int main(void) {
+    int a[] = {5, 10, 23, 42};
+    int * b;
+    ck_hash_t t;
+    ck_hash_init(&t, 3);
+    ck_hash_insert(&t, "hello", (void*) &a[0]);
+    ck_hash_insert(&t, "test", (void*) &a[1]);
+    ck_hash_insert(&t, "foob", (void*) &a[2]);
+    b = (int*) ck_hash_lookup(&t, "hello");
+    printf("%d\n", *b);
+    b = (int*) ck_hash_lookup(&t, "foob");
+    printf("%d\n", *b);
+    return 0;
+}