Matthew Turk avatar Matthew Turk committed 83c602e

Speed up the kernel computation considerably by storing distance.

This also includes a different calculation of the half_len, but commented out.
I need to think carefully what the correct half_len is.

Comments (0)

Files changed (1)

yt/frontends/sph/smoothing_kernel.pyx

 from fp_utils cimport iclip
 cimport cython
 
-#@cython.boundscheck(False)
-#@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
 @cython.cdivision(True)
 def smooth_particles(
         input_fields, output_grids,
 
     cdef np.float64_t *kernel_sum = \
         <np.float64_t *>malloc(ngas * sizeof(np.float64_t))
+    cdef np.float64_t *pdist = \
+        <np.float64_t *>malloc(dims[0]*dims[1]*dims[2]*
+                               sizeof(np.float64_t))
 
     nf = len(input_fields)
     assert(nf == len(output_grids))
             half_len = <int>(hsml[p]/dds[i]) + 1
             ib0[i] = ind[i] - half_len
             ib1[i] = ind[i] + half_len
+            #pos[i] = ppos[p, i] - left_edge[i]
+            #ind[i] = <int>(pos[i] / dds[i])
+            #ib0[i] = <int>((pos[i] - hsml[i]) / dds[i]) - 1
+            #ib1[i] = <int>((pos[i] + hsml[i]) / dds[i]) + 1
             if ib0[i] >= dims[i] or ib1[i] < 0:
                 skip = 1
             ib0[i] = iclip(ib0[i], 0, dims[i] - 1)
                     idist[2] = (ind[2] - k) * (ind[2] - k) * sdds[2]
                     dist = idist[0] + idist[1] + idist[2]
                     dist = sqrt(dist) / hsml[p]
-                    kernel_sum[p] += sph_kernel(dist)
+                    gi = ((i * dims[1] + j) * dims[2]) + k
+                    pdist[gi] = sph_kernel(dist)
+                    kernel_sum[p] += pdist[gi]
         for i from ib0[0] <= i <= ib1[0]:
-            idist[0] = (ind[0] - i) * (ind[0] - i) * sdds[0]
             for j from ib0[1] <= j <= ib1[1]:
-                idist[1] = (ind[1] - j) * (ind[1] - j) * sdds[1] 
                 for k from ib0[2] <= k <= ib1[2]:
-                    idist[2] = (ind[2] - k) * (ind[2] - k) * sdds[2]
-                    dist = idist[0] + idist[1] + idist[2]
-                    dist = sqrt(dist) / hsml[p]
-                    kern = sph_kernel(dist)
                     gi = ((i * dims[1] + j) * dims[2]) + k
+                    dist = pdist[gi] / kernel_sum[p]
                     for d in range(nf):
-                        gdata[d][gi] += pdata[d][p] * kern / kernel_sum[p]
+                        gdata[d][gi] += pdata[d][p] * dist
     free(kernel_sum)
+    free(pdist)
     free(gdata)
     free(pdata)
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.