Rio Yokota avatar Rio Yokota committed ed7b55e

Revived cmake.

Comments (0)

Files changed (4)

 ADD_DEFINITIONS(-D${EQUATION})
 ADD_DEFINITIONS(-D${BASIS})
 ADD_DEFINITIONS(-D${DEVICE})
-ADD_SUBDIRECTORY(kernel)
+ADD_SUBDIRECTORY(kernels)
 
 # Unit tests
 INCLUDE(CTest)
-ADD_SUBDIRECTORY(unit_test)
+ADD_SUBDIRECTORY(examples)
 #DEVICE	= MIC
 
 ### GCC compiler
-CXX	= mpicxx -ggdb3 -Wall -Wextra -Wshadow -Wuninitialized -O3 -mavx -ffast-math -funroll-loops -fforce-addr -fbounds-check
+CXX	= mpicxx -ggdb3 -Wall -Wextra -Wshadow -Wuninitialized -O3 -msse4.2 -ffast-math -funroll-loops -fforce-addr -fbounds-check
 ### Intel compiler
 #CXX	= mpicxx -Wall -xHOST -O3 -funroll-loops -finline-functions -ansi-alias
 ### BG/P compiler

kernels/CMakeLists.txt

+IF(USE_GPU)
+  CUDA_ADD_LIBRARY(Kernels ${EQUATION}${BASIS}${DEVICE}.cu)
+  TARGET_LINK_LIBRARIES(Kernels ${CUDA_LIBRARIES})
+ELSE()
+  ADD_LIBRARY(Kernels ${EQUATION}${BASIS}${DEVICE}.cxx)
+ENDIF()

kernels/LaplaceCartesianCPU.cxx

     x2 = _mm256_set1_ps(Bj[1].X[0]);                           
     y2 = _mm256_set1_ps(Bj[1].X[1]);
     z2 = _mm256_set1_ps(Bj[1].X[2]);                           
-    for( int j=0; j<nj; j++ ) {
+    for (int j=0; j<nj; j++) {
       __m256 invR = _mm256_rsqrt_ps(R2);
       __m256 mask = _mm256_cmp_ps(R2, _mm256_setzero_ps(), _CMP_GT_OQ);
       invR = _mm256_and_ps(invR, mask);
       R2 = _mm256_add_ps(R2, z2);
       z2 = _mm256_set1_ps(Bj[j+2].X[2]);
     }
-    for( int k=0; k<8; k++ ) {
+    for (int k=0; k<8; k++) {
       Bi[i+k].TRG[0] += ((float*)&pot)[k];
       Bi[i+k].TRG[1] += ((float*)&ax)[k];
       Bi[i+k].TRG[2] += ((float*)&ay)[k];
     x2 = _mm256_set1_ps(B[i+2].X[0]);
     y2 = _mm256_set1_ps(B[i+2].X[1]);
     z2 = _mm256_set1_ps(B[i+2].X[2]);
-    for( int j=i+1; j<n; j++ ) {
+    for (int j=i+1; j<n; j++) {
       __m256 invR = _mm256_rsqrt_ps(R2);
       __m256 mask = _mm256_cmp_ps(_mm256_setr_ps(i, i+1, i+2, i+3, i+4, i+5, i+6, i+7),
         _mm256_set1_ps(j), _CMP_LT_OQ);
       R2 = _mm256_add_ps(R2, z2);
       z2 = _mm256_set1_ps(B[j+2].X[2]);
     }
-    for( int k=0; k<8; k++ ) {
+    for (int k=0; k<8; k++) {
       B[i+k].TRG[0] += ((float*)&pot)[k];
       B[i+k].TRG[1] += ((float*)&ax)[k];
       B[i+k].TRG[2] += ((float*)&ay)[k];
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.