Commits

Peter Shinners  committed 1a75ba3

Minimize code and branches in the search loop for 20% speedup.

  • Participants
  • Parent commits 2cfda6f

Comments (0)

Files changed (2)

 
     // Find values common to each dimension
 
-    size_t* common = (size_t*)malloc(sizeof(size_t) * (minSize + hood->dimensions + hood->dimensions));
+    size_t* common = (size_t*)malloc(sizeof(size_t) * (minSize + hood->dimensions) + sizeof(size_t*) * hood->dimensions);
     size_t* found = common;
 
     // To eliminate as much code as possible fro the search loop, precompute several needed values
 
     size_t* workAxis = common + minSize;
-    size_t** workIndices = (size_t**)workAxis + hood->dimensions;
-    workIndices[0] = 0; // unused
+    size_t** workNexts = (size_t**)workAxis + hood->dimensions;
+    workNexts[0] = 0; // unused
     workAxis[0] = minAxis; // unused
     for (size_t d = 1; d < hood->dimensions; ++d) {
         workAxis[d] = minAxis + d;
         if (workAxis[d] >= hood->dimensions) {
             workAxis[d] -= hood->dimensions;
         }
-        workIndices[d] = hood->nextIndices + hood->numPoints * workAxis[d];
+        workNexts[d] = hood->nextIndices + hood->numPoints * workAxis[d];
     }
 
     // The main search loop. All searching time is spent here.
+    size_t* minIndices = hood->indices + hood->numPoints * minAxis;
 
     float start = GetTime();
     for (size_t minStep = ranges[minAxis].start; minStep < ranges[minAxis].end; ++minStep) {
         size_t workStep = minStep;
         int match = 1;
-        for (size_t d = 1; match && d < hood->dimensions; ++d) {
-            size_t pt = workIndices[d][workStep];
-            match = (pt >= ranges[workAxis[d]].start && pt <= ranges[workAxis[d]].end);
+        for (size_t d = 1; d < hood->dimensions; ++d) {
+            size_t pt = workNexts[d][workStep];
+            if (!(match = (pt >= ranges[workAxis[d]].start && pt <= ranges[workAxis[d]].end)))
+                break;
             workStep = pt;
         }
         if (match) {
-            *found = hood->indices[hood->numPoints * minAxis + minStep];
-            ++found;
+            *found++ = minIndices[minStep];
         }
     }
     printf("  SCAN: %.3f sec\n", GetTime() - start);

File perfnotes.txt

 On the creation of the neighborhood. Reduce the 'inverse' to a single array instead
 of one per dimension. This shortens create time from 16.5s to 12.0s. Reduces memory
 by 76mb
-
-Create neighborhood: 12.069 sec
-Search block 639983: 0.116 sec
-Cleanup: 0.011 sec
-2658256maxresident
+    Create neighborhood: 12.069 sec
+    Search block 639983: 0.116 sec
+    Cleanup: 0.011 sec
+    2658256maxresident
 
 
 
 in the rectangular search compared to the spherical search of kdtree and nanoflann
 
 
+
+Minimize the code inside the search loop. Gives about 20% performance.
+Not digging in the right place, but gets the 40radius test to .1 second.
+    Create neighborhood: 12.076 sec
+    Search block 639983: 0.09 sec
+    Cleanup: 0.010 sec
+    2657216maxresident