1. Andy Gross
  2. bashohash

Commits

Andy Gross  committed 0590f4c

add murmur hash

  • Participants
  • Parent commits a364084
  • Branches default

Comments (0)

Files changed (4)

File c_src/efnv_nifs.c

View file
  • Ignore whitespace
 #include "efnv_nifs.h"
 #include "fnv.h"
-
+#include "murmur.h"
 
 ERL_NIF_TERM efnv_hash(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-
+ERL_NIF_TERM murmur_hash(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
 
 static ErlNifFunc nif_funcs[] = {
-    {"efnv_hash", 2, efnv_hash}
+    {"efnv_hash", 2, efnv_hash},
+    {"murmur_hash", 2, murmur_hash}
 };
 
 ERL_NIF_INIT(efnv, nif_funcs, NULL, NULL, NULL, NULL);
     result = fnv_hash((const void *)bin_term.data, bin_term.size, seed);
     return enif_make_uint(env, result);
 }
+
+ERL_NIF_TERM murmur_hash(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 
+    ErlNifBinary bin_term;
+    unsigned int seed;
+    unsigned int result = 0;
+    if (!enif_inspect_binary(env, argv[0], &bin_term)) 
+        return enif_make_badarg(env);
+    if (!enif_get_uint(env, argv[1], &seed))
+        return enif_make_badarg(env);
+    result = MurmurHash2((const void *)bin_term.data, bin_term.size, seed);
+    return enif_make_uint(env, result);
+}

File c_src/murmur.c

View file
  • Ignore whitespace
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
+{
+	// 'm' and 'r' are mixing constants generated offline.
+	// They're not really 'magic', they just happen to work well.
+
+	const unsigned int m = 0x5bd1e995;
+	const int r = 24;
+
+	// Initialize the hash to a 'random' value
+
+	unsigned int h = seed ^ len;
+
+	// Mix 4 bytes at a time into the hash
+
+	const unsigned char * data = (const unsigned char *)key;
+
+	while(len >= 4)
+	{
+		unsigned int k = *(unsigned int *)data;
+
+		k *= m; 
+		k ^= k >> r; 
+		k *= m; 
+		
+		h *= m; 
+		h ^= k;
+
+		data += 4;
+		len -= 4;
+	}
+	
+	// Handle the last few bytes of the input array
+
+	switch(len)
+	{
+	case 3: h ^= data[2] << 16;
+	case 2: h ^= data[1] << 8;
+	case 1: h ^= data[0];
+	        h *= m;
+	};
+
+	// Do a few final mixes of the hash to ensure the last few
+	// bytes are well-incorporated.
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+}

File c_src/murmur.h

View file
  • Ignore whitespace
+
+
+unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed );

File src/efnv.erl

View file
  • Ignore whitespace
 -module(efnv).
 -author('cliff@powerset.com').
 -author('andy@basho.com').
--export([fnv/1, fnv/2]).
+-export([fnv/1, fnv/2, murmur/1, murmur/2]).
 -on_load(init/0).
 
 -define(SEED, 2166136261).
 fnv(T) -> fnv(T, ?SEED).
 fnv(T, Seed) when is_binary(T) -> efnv_hash(T, Seed);
 fnv(T, Seed) -> efnv_hash(term_to_binary(T), Seed).
-efnv_hash(_T, _Seed) -> "NIF library not loaded".
+efnv_hash(_T, _Seed) -> "NIF library not loaded".    
+murmur(T) -> murmur(T, ?SEED).
+murmur(T, Seed) when is_binary(T) -> murmur_hash(T, Seed);
+murmur(T, Seed) -> murmur_hash(term_to_binary(T), Seed).    
+murmur_hash(_T, _Seed) -> "NIF library not loaded".
+