Source

nrnr / nrnr.c

Full commit
#include <string.h>
#include <alloca.h>
#include <stdio.h> // temp
#include "nrnr.h"


// start is inclusive, end is exclusive
struct range {
    index_t start;
    index_t end;
};



static void SortIndices(index_t* array, index_t numPoints, const float* points, size_t axis, size_t dimensions);
static struct range BisectIndices(const index_t* array, const float* points, index_t numPoints, size_t dimensions, float minVal, float maxVal, size_t axis);


#include <sys/time.h>
#include <time.h>
static float GetTime(void)
{
	static struct timeval timeval, first_timeval;
	gettimeofday(&timeval, 0);
	if(first_timeval.tv_sec == 0) {
		first_timeval = timeval;
		return 0;
	}
	unsigned int s = (timeval.tv_sec - first_timeval.tv_sec) * 1000 + (timeval.tv_usec - first_timeval.tv_usec) / 1000.0;
    return s / 1000.0f;
}



struct NrNrHood* NrNrCreateNeighborhood(size_t numPoints, size_t dimensions, const float* points) {
    // All data is allocated in a single block. Allocate and then
    // aim some pointers into the space.

    size_t allocSize = sizeof(struct NrNrHood) + sizeof(index_t) * numPoints * 6;
    struct NrNrHood* hood = malloc(allocSize);
    hood->numPoints = numPoints;
    hood->dimensions = dimensions;
    hood->points = points;
    hood->chunks = (numPoints + NRNRCHUNK - 1) / NRNRCHUNK;
    hood->indices = (index_t*)(hood + 1);
    hood->nextIndices = hood->indices + numPoints * 3;

    // Allocate temporary buffer for storing the inverse sorted array
    // Find inverse mappifng to sort index (point to index)
    // This goes into a temporary set of inverse indices
    size_t inverseSize = NRNRCHUNK;
    if (inverseSize > numPoints) {
        inverseSize = numPoints;
    }
    index_t* inverse = (index_t*)malloc(sizeof(index_t) * inverseSize);

    // Break the data into small chunks
    index_t* chunkIndices = hood->indices;
    index_t* chunkNextIndices = hood->nextIndices;
    const float* chunkPoints = points;
    size_t indexStep = hood->dimensions * NRNRCHUNK;
    for (size_t c = 0; c < hood->chunks; ++c,
                chunkIndices += indexStep, chunkNextIndices += indexStep, chunkPoints += indexStep) {
        index_t chunkSize = numPoints - (c * NRNRCHUNK);
        if (chunkSize > NRNRCHUNK) {
            chunkSize = NRNRCHUNK;
        }

        // Initialize indices with list of all points
        for (index_t i = 0; i < chunkSize; ++i) {
            chunkIndices[i] = i;
        }
//printf("presorted 0:"); for (size_t z=0; z < chunkSize; ++z) printf(" %zu", chunkIndices[z]); printf("\n");

        // Copy ordered point numbers from dimension 0 into all others
        for (size_t d = 1; d < dimensions; ++d) {
            index_t* initSrc = chunkIndices + (d - 1) * chunkSize;
            index_t* initDst = chunkIndices + d * chunkSize;
            memcpy(initDst, initSrc, sizeof(index_t) * chunkSize);
//printf("presorted %zu:", d); for (size_t z=0; z < chunkSize; ++z) printf(" %zu", initDst[z]); printf("\n");
        }

        // Sort indices for each dimension
        for (size_t d = 0; d < dimensions; ++d) {
            index_t* array = chunkIndices + d * chunkSize;
            SortIndices(array, chunkSize, chunkPoints, d, dimensions);
//printf("   sorted %zu:", d); for (size_t z=0; z < chunkSize; ++z) printf("  %zu(%.1f)", array[z], chunkPoints[array[z] * dimensions + d]); printf("\n");
        }

        // Build next arrays that map a point from one dimension to the next
        // Allows jumping from dim0 index N to dim1, and then to dim2, ...
        for (size_t d = 0; d < dimensions; ++d) {

            // Build an index of the 'inverse' mapping from point number to sorted index
            index_t* ind = chunkIndices + chunkSize * d;

            for (index_t i = 0; i < chunkSize; ++i) {
                inverse[ind[i]] = i;
            }
//printf("     inverse %zu:", d); for (size_t z=0; z < chunkSize; ++z) printf(" %zu", inverse[z]); printf("\n");


            // Fill out the next array, move 'ind' to the array coming from
            index_t* next = chunkNextIndices + chunkSize * d;
            if (d) {
                ind = chunkIndices + chunkSize * (d - 1);
            } else {
                ind = chunkIndices + chunkSize * (dimensions - 1);
            }
            for (index_t i = 0; i < chunkSize; ++i) {
                next[i] = inverse[ind[i]];
            }
//printf("        next %zu:", d); for (size_t z=0; z < chunkSize; ++z) printf(" %zu", next[z]); printf("\n");
        }

    }

    free(inverse);

    return hood;
}


struct NrNrHood* NrNrFreeNeighborhood(struct NrNrHood* hood) {
    if (hood) {
        free(hood);
    }
    return NULL;
}



//size_t NrNrCountBlock(struct NrNrHood* hood, size_t most, float* minimum, float* maximum) {
//}


size_t NrNrSearchBlock(const struct NrNrHood* hood, size_t** results, const float* minimum, const float* maximum) {
    struct range* ranges = alloca(sizeof(ranges) * hood->dimensions);

    size_t* workAxis = alloca(sizeof(size_t) * hood->dimensions);;
    index_t** workNexts = alloca(sizeof(index_t*) * hood->dimensions);
    workAxis[0] = hood->dimensions; // unused (will crash if used)
    workNexts[0] = NULL; // unused (will crash if used)

    size_t* common = (size_t*)malloc(sizeof(size_t) * hood->numPoints);
    size_t* found = common;

    // Break the data into small chunks
    index_t* chunkIndices = hood->indices;
    index_t* chunkNextIndices = hood->nextIndices;
    const float* chunkPoints = hood->points;
    size_t chunkOffset = 0;
    size_t indexStep = hood->dimensions * NRNRCHUNK;
    for (size_t c = 0; c < hood->chunks; ++c,
                chunkIndices += indexStep, chunkNextIndices += indexStep, chunkPoints += indexStep, chunkOffset += NRNRCHUNK) {
        index_t chunkSize = hood->numPoints - (c * NRNRCHUNK);
        if (chunkSize > NRNRCHUNK) {
            chunkSize = NRNRCHUNK;
        }
//printf("Search Chunk %zu  size=%zu\n", c, chunkSize);

        // Find ranges of indices for each dimension

        for (size_t d = 0; d < hood->dimensions; ++d) {
            index_t* array = chunkIndices + chunkSize * d;
            ranges[d] = BisectIndices(array, chunkPoints, chunkSize, hood->dimensions, minimum[d], maximum[d], d);
//printf("  bisect %zu: %zu-%zu, ", d, ranges[d].start, ranges[d].end);
//for (size_t z=ranges[d].start; z<ranges[d].end; ++z) printf("  %zu(%.1f)", array[z] + chunkOffset, chunkPoints[array[z] * hood->dimensions + d]); printf("\n");
            if (ranges[d].start == ranges[d].end) {
                continue;
            }
        }

        // Find dimension with the least number of matches

        size_t minAxis = 0;
        index_t minSize = ranges[0].end - ranges[0].start;
        for (size_t d = 0; d < hood->dimensions; ++d) {
            index_t size = ranges[d].end - ranges[d].start;
            if (size < minSize) {
                minSize = size;
                minAxis = d;
            }
        }

        // To eliminate as much code as possible fro the search loop, precompute several needed values

        for (size_t d = 1; d < hood->dimensions; ++d) {
            workAxis[d] = minAxis + d;
            if (workAxis[d] >= hood->dimensions) {
                workAxis[d] -= hood->dimensions;
            }
            workNexts[d] = chunkNextIndices + chunkSize * workAxis[d];
        }

        // The main search loop. All searching time is spent here.
        index_t* minIndices = chunkIndices + chunkSize * minAxis;

        for (index_t minStep = ranges[minAxis].start; minStep < ranges[minAxis].end; ++minStep) {
            index_t workStep = minStep;
            int match = 1;
            for (size_t d = 1; d < hood->dimensions; ++d) {
                index_t pt = workNexts[d][workStep];
                if (!(match = (pt >= ranges[workAxis[d]].start && pt <= ranges[workAxis[d]].end)))
                    break;
                workStep = pt;
            }
            if (match) {
                *found++ = minIndices[minStep] + chunkOffset;
            }
        }
    }

    // Handle return values
    size_t totalFound = found - common;
    if (!totalFound) {
        free(common);
        *results = NULL;
        return 0;
    }
    // realloc common to the smaller, found size
    if (totalFound < hood->numPoints) {
        common = realloc(common, sizeof(size_t) * totalFound);
    }

    *results = common;
    return totalFound;
}



size_t* NrNrFreeResults(size_t* results) {
    if (!results) {
        free(results);
    }
    return NULL;
}



void SortIndices(index_t* array, index_t numPoints, const float* points, size_t axis, size_t dimensions) {
    if (numPoints < 2) {
        return;
    }

    // Insertion sort for small ranges
    if (numPoints < 16) {
        for (index_t i = 1; i < numPoints; ++i) {
            index_t point = array[i];
            float position = points[point * dimensions + axis];
            index_t j;
            for (j = i; j > 0 && points[array[j - 1] * dimensions + axis] > position; --j) {
                array[j] = array[j - 1];
            }
            array[j] = point;
        }
        return;
    }

    // Quicksort (recursive) for most spans
    float position = points[array[numPoints / 2] * dimensions + axis];
    index_t *left = array;
    index_t *right = array + numPoints - 1;
    while (left <= right) {
        while (points[(*left) * dimensions + axis] < position)
            left++;
        while (points[(*right) * dimensions + axis] > position)
            right--;
        if (left <= right) {
            index_t t = *left;
            *left++ = *right;
            *right-- = t;
        }
    }

    SortIndices(array, right - array + 1, points, axis, dimensions);
    SortIndices(left, array + numPoints - left, points, axis, dimensions);
}



struct range BisectIndices(const index_t* array, const float* points, index_t numPoints, size_t dimensions, float minVal, float maxVal, size_t axis) {
    struct range range;
    const index_t *fullStart, *fullEnd;
    const index_t *start;
    const index_t *end;

    fullStart = start = array;
    fullEnd = end = fullStart + numPoints;
#if 0
    float startValue = points[array[0] * dimensions + axis];
    if (startValue > maxVal) {
        range.start = range.end = numPoints;
        return range;
    }
#endif

    float finalValue = points[array[numPoints - 1] * dimensions + axis];
#if 1
    if (finalValue < minVal) {
        range.start = range.end = 0;
        return range;
    }
#endif

    while (start < end) {
        index_t dist = end - start;
        const index_t* half = start + (dist / 2);
        float halfValue = points[*half * dimensions + axis];
        if (minVal <= halfValue) {
            end = half;
#if 0
            if (finalValue < halfValue) {
                fullEnd = half;
                finalValue = points[*fullEnd * dimensions + axis];
            }
#endif
        } else {
            start = half + 1;
        }
    }
    range.start = start - fullStart;

    start = end;
    end = fullEnd;
    while (start < end) {
        index_t dist = end - start;
        const index_t* half = start + (dist / 2);
        float halfValue = points[*half * dimensions + axis];
        if (maxVal >= halfValue) {
            start = half + 1;
        } else {
            end = half;
        }
    }
    range.end = start - fullStart;

    return range;
}