Support Custom Hashing function TLongIntHashMap

Issue #17 new
Euncheon Lim created an issue

Hi all. I recently started using Trove for building large hash table. I found that the performance of current hash function(HashFunctions.hash(long val)) does not have a stable performance with my data(having more than 100M keys). I would like to use custom hashing function for this primitive maps. However, insertKeyAt() and insertKeyRehash() methods are not visible. Therefore, I had to implement oddly. Could you tell me if there is an alternative way?

package de.mpg.tuebingen.euncheonlim;

import gnu.trove.map.hash.TLongIntHashMap;

public class CustomHashTLongIntHashMap extends TLongIntHashMap {
    public CustomHashTLongIntHashMap(int initialCapacity) {
        super(initialCapacity);
    }

    @Override
    protected int insertKey(long val) {
        int hash, index;
        long h = val;
        h = customHashing();
        hash = (int) h & 0x7fffffff;
        index = hash % _states.length;
        byte state = _states[index];

        consumeFreeSlot = false;

        if (state == FREE) {
            consumeFreeSlot = true;
            customInsertKeyAt(index, val);

            return index;       // empty, all done
        }

        if (state == FULL && _set[index] == val) {
            return -index - 1;   // already stored
        }

        // already FULL or REMOVED, must probe
        return customInsertKeyRehash(val, index, hash, state);
    }

    protected int customInsertKeyRehash(long val, int index, int hash, byte state) {
        // compute the double hash
        final int length = _set.length;
        int probe = 1 + (hash % (length - 2));
        final int loopIndex = index;
        int firstRemoved = -1;

        /**
         * Look until FREE slot or we start to loop
         */
        do {
            // Identify first removed slot
            if (state == REMOVED && firstRemoved == -1)
                firstRemoved = index;

            index -= probe;
            if (index < 0) {
                index += length;
            }
            state = _states[index];

            // A FREE slot stops the search
            if (state == FREE) {
                if (firstRemoved != -1) {
                    customInsertKeyAt(firstRemoved, val);
                    return firstRemoved;
                } else {
                    consumeFreeSlot = true;
                    customInsertKeyAt(index, val);
                    return index;
                }
            }

            if (state == FULL && _set[index] == val) {
                return -index - 1;
            }

            // Detect loop
        } while (index != loopIndex);

        // We inspected all reachable slots and did not find a FREE one
        // If we found a REMOVED slot we return the first one found
        if (firstRemoved != -1) {
            customInsertKeyAt(firstRemoved, val);
            return firstRemoved;
        }

        // Can a resizing strategy be found that resizes the set?
        throw new IllegalStateException("No free or removed slots available. Key set full?!!");
    }

    protected void customInsertKeyAt(int index, long val) {
            _set[index] = val;  // insert value
            _states[index] = FULL;
    }
}

Comments (2)

  1. Rob Eden

    There probably isn't a better way right now. I suspect the reason the methods are restricted like that is the compiler can inline them if it knows they can't be overridden. I'd have to dig into it to know for certain though.

  2. Log in to comment