bitsquid / foundation / hash.h

#pragma once

#include "array.h"
#include "collection_types.h"

/// The hash function stores its data in a "list-in-an-array" where
/// indices are used instead of pointers. A free list is used to keep
/// track of free data entries.

namespace hash
{
	template<typename T> void set(Hash<T> &h, uint64_t key, const T &value);
}

namespace hash_internal
{
	const uint32_t END_OF_LIST = 0xffffffffu;
	static const uint64_t UNUSED_KEY = 0xffffffffeeeeeeeeull;

	struct FindResult
	{
		uint32_t hash_i;
		uint32_t data_prev;
		uint32_t data_i;
	};	

	template<typename T> uint32_t add_entry(Hash<T> &h, uint64_t key)
	{
		typename Hash<T>::Entry e;
		e.key = key;
		e.next = END_OF_LIST;
		if (h._freelist == END_OF_LIST) {
			uint32_t ei = array::size(h._data);
			array::push_back(h._data, e);
			return ei;
		}
		uint32_t ei = h._freelist;
		h._freelist = h._data[h._freelist].next;
		h._data[ei] = e;
		return ei;
	}

	template<typename T> void erase(Hash<T> &h, const FindResult &fr)
	{
		if (fr.data_prev == END_OF_LIST)
			h._hash[fr.hash_i] = h._data[fr.data_i].next;
		else
			h._data[fr.data_prev].next = h._data[fr.data_i].next;
		h._data[fr.data_i].key = UNUSED_KEY;
		h._data[fr.data_i].next = h._freelist;
		h._freelist = fr.data_i;
	}

	template<typename T> FindResult find(const Hash<T> &h, uint64_t key)
	{
		FindResult fr;
		fr.hash_i = END_OF_LIST;
		fr.data_prev = END_OF_LIST;
		fr.data_i = END_OF_LIST;

		if (array::size(h._hash) == 0)
			return fr;

		fr.hash_i = key % array::size(h._hash);
		fr.data_i = h._hash[fr.hash_i];
		while (fr.data_i != END_OF_LIST) {
			if (h._data[fr.data_i].key == key)
				return fr;
			fr.data_prev = fr.data_i;
			fr.data_i = h._data[fr.data_i].next;
		}
		return fr;
	}

	template<typename T> uint32_t find_or_fail(const Hash<T> &h, uint64_t key)
	{
		return find(h, key).data_i;
	}

	template<typename T> uint32_t find_or_make(Hash<T> &h, uint64_t key)
	{
		FindResult fr = find(h, key);
		if (fr.data_i != END_OF_LIST)
			return fr.data_i;

		fr.data_i = add_entry(h, key);
		if (fr.data_prev == END_OF_LIST)
			h._hash[fr.hash_i] = fr.data_i;
		else
			h._data[fr.data_prev].next = fr.data_i;
		return fr.data_i;
	}	

	template<typename T> void find_and_erase(Hash<T> &h, uint64_t key)
	{
		const FindResult fr = find(h, key);
		if (fr.data_i != END_OF_LIST)
			erase(h, fr);
	}

	template<typename T> void rehash(Hash<T> &h, uint32_t new_size)
	{
		Hash<T> nh(*h._hash._allocator);
		array::resize(nh._hash, new_size);
		array::reserve(nh._data, array::size(h._data));
		for (uint32_t i=0; i<new_size; ++i)
			nh._hash[i] = END_OF_LIST;
		for (uint32_t i=0; i<array::size(h._data); ++i) {
			const typename Hash<T>::Entry &e = h._data[i];
			if (h._data[i].key == UNUSED_KEY)
				continue;
			hash::set(nh, e.key, e.value);
		}

		Hash<T> empty(*h._hash._allocator);
		h.~Hash<T>();
		memcpy(&h, &nh, sizeof(Hash<T>));
		memcpy(&nh, &empty, sizeof(Hash<T>));
	}

	template<typename T> bool full(const Hash<T> &h)
	{
		const float max_load_factor = 0.7f;
		return array::size(h._data) >= array::size(h._hash) * max_load_factor;
	}

	template<typename T> void grow(Hash<T> &h)
	{
		uint32_t new_size = array::size(h._data) * 2 + 10;
		rehash(h, new_size);
	}
}

namespace hash
{
	const uint32_t END_OF_LIST = 0xffffffffu;

	template<typename T> bool has(const Hash<T> &h, uint64_t key)
	{
		return hash_internal::find_or_fail(h, key) != END_OF_LIST;
	}

	template<typename T> const T &get(const Hash<T> &h, uint64_t key, const T &deffault)
	{
		uint32_t i = hash_internal::find_or_fail(h, key);
		return i == END_OF_LIST ? deffault : h._data[i].value;
	}

	template<typename T> void set(Hash<T> &h, uint64_t key, const T &value)
	{
		if (array::size(h._hash) == 0)
			hash_internal::grow(h);

		uint32_t i = hash_internal::find_or_make(h, key);
		h._data[i].value = value;
		if (hash_internal::full(h))
			hash_internal::grow(h);
	}

	template<typename T> void remove(Hash<T> &h, uint64_t key)
	{
		hash_internal::find_and_erase(h, key);
	}

	template<typename T> void reserve(Hash<T> &h, uint32_t size)
	{
		hash_internal::rehash(h, size);
	}
}

template <typename T> Hash<T>::Hash(Allocator &a) :
	_freelist(END_OF_LIST), _hash(a), _data(a)
{}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.