Rio Yokota avatar Rio Yokota committed 5be1681

Still debugging GPU wrappers.

Comments (0)

Files changed (5)

kernel/GPUEvaluator.cxx

 
 template<Equation equation>
 void Evaluator<equation>::evalP2P(Bodies &ibodies, Bodies &jbodies, bool onCPU) {// Evaluate all P2P kernels
+  startTimer("evalP2P");                                        // Start timer
   int numIcall = int(ibodies.size()-1)/MAXBODY+1;               // Number of icall loops
   int numJcall = int(jbodies.size()-1)/MAXBODY+1;               // Number of jcall loops
   int ioffset = 0;                                              // Initialzie offset for icall loops
     }                                                           // End loop over jcall
     ioffset += MAXBODY;                                         // Increment icall offset
   }                                                             // End loop over icall
+  stopTimer("evalP2P");                                         // Stop timer
 }
 
 template<Equation equation>
 void Evaluator<equation>::evalP2M(Cells &cells) {               // Evaluate all P2M kernels
+  startTimer("evalP2M");                                        // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator for target
   const int numCell = MAXCELL/NCRIT/4;                          // Number of cells per icall
   int numIcall = int(cells.size()-1)/numCell+1;                 // Number of icall loops
     clearBuffers();                                             //  Clear GPU buffers
     ioffset += numCell;                                         //  Increment ioffset
   }                                                             // End loop over icall
+  stopTimer("evalP2M");                                         // Stop timer
 }
 
 template<Equation equation>
 void Evaluator<equation>::evalM2M(Cells &cells, Cells &jcells) {// Evaluate all M2M kernels
+  startTimer("evalM2M");                                        // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator for target
   Cj0 = jcells.begin();                                         // Set begin iterator for source
   const int numCell = MAXCELL / NTERM / 2;                      // Number of cells per icall
     }                                                           //  End loop over icall
     level--;                                                    //  Decrement level
   }                                                             // End while loop over levels
+  stopTimer("evalM2M");                                         // Stop timer
 }
 
 template<Equation equation>
 void Evaluator<equation>::evalM2L(C_iter Ci, C_iter Cj) {       // Queue single M2L kernel
-  if( Ci-Ci0 == 579 && Cj-Cj0 == 512 ) std::cout << listM2L[579].size() << std::endl;
   listM2L[Ci-Ci0].push_back(Cj);                                // Push source cell into M2L interaction list
   flagM2L[Ci-Ci0][Cj] |= Iperiodic;                             // Flip bit of periodic image flag
   NM2L++;                                                       // Count M2L kernel execution
 
 template<Equation equation>
 void Evaluator<equation>::evalM2L(Cells &cells) {               // Evaluate queued M2L kernels
+  startTimer("evalM2L");                                        // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator
   const int numCell = MAXCELL / NTERM / 2;                      // Number of cells per icall
   int numIcall = int(cells.size()-1)/numCell+1;                 // Number of icall loops
   }                                                             // End loop over icall
   listM2L.clear();                                              // Clear interaction lists
   flagM2L.clear();                                              // Clear periodic image flags
+  stopTimer("evalM2L");                                         // Stop timer
 }
 
 template<Equation equation>
 
 template<Equation equation>
 void Evaluator<equation>::evalM2P(Cells &cells) {               // Evaluate queued M2P kernels
+  startTimer("evalM2P");                                        // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator for target
   const int numCell = MAXCELL/NCRIT/4;                          // Number of cells per icall
   int numIcall = int(cells.size()-1)/numCell+1;                 // Number of icall loops
   }                                                             // End loop over icall
   listM2P.clear();                                              // Clear interaction lists
   flagM2P.clear();                                              // Clear periodic image flags
+  stopTimer("evalM2P");                                         // Stop timer
 }
 
 template<Equation equation>
 
 template<Equation equation>
 void Evaluator<equation>::evalP2P(Cells &cells) {               // Evaluate queued P2P kernels
+  startTimer("evalP2P");                                        // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator
   const int numCell = MAXCELL/NCRIT/4;                          // Number of cells per icall
   int numIcall = int(cells.size()-1)/numCell+1;                 // Number of icall loops
   }                                                             // End loop over icall
   listP2P.clear();                                              // Clear interaction lists
   flagP2P.clear();                                              // Clear periodic image flags
+  stopTimer("evalP2P");                                         // Stop timer
 }
 
 template<Equation equation>
 void Evaluator<equation>::evalL2L(Cells &cells) {               // Evaluate all L2L kenrels
+  startTimer("evalL2L");                                        // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator
   const int numCell = MAXCELL / NTERM / 2;                      // Number of cells per icall
   int numIcall = int(cells.size()-1)/numCell+1;                 // Number of icall loops
     }                                                           //  End loop over icall
     level++;                                                    //  Increment level
   }                                                             // End while loop over levels
+  stopTimer("evalL2L");                                         // Stop timer
 }
 
 template<Equation equation>
 void Evaluator<equation>::evalL2P(Cells &cells) {               // Evaluate all L2P kernels
+  startTimer("evalL2P");                                        // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator
   const int numCell = MAXCELL/NCRIT/4;                          // Number of cells per icall
   int numIcall = int(cells.size()-1)/numCell+1;                 // Number of icall loops
     clearBuffers();                                             //  Clear GPU buffers
     ioffset += numCell;                                         //  Increment ioffset
   }                                                             // End loop over icall
+  stopTimer("evalL2P");                                         // Stop timer
 }
 
 template<Equation equation>
 
 template<Equation equation>
 void Evaluator<equation>::evalEwaldReal(Cells &cells) {         // Evaluate queued Ewald real kernels
+  startTimer("evalEwaldReal");                                  // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator
   const int numCell = MAXCELL/NCRIT/4;                          // Number of cells per icall
   int numIcall = int(cells.size()-1)/numCell+1;                 // Number of icall loops
   }                                                             // End loop over icall
   listP2P.clear();                                              // Clear interaction lists
   flagP2P.clear();                                              // Clear periodic image flags
+  stopTimer("evalEwaldReal");                                   // Stop timer
 }
 
 template<Equation equation>

kernel/GPUVanDerWaals.cu

 #define KERNEL
 #include "kernel.h"
 #undef KERNEL
-__device__ __constant__ gpureal constDevc[514];                 // Constants on device
+const int maxConst = 2 + 2 * 32 * 32;
+__device__ __constant__ gpureal constDevc[maxConst];            // Constants on device
 
 template<>
 void Kernel<VanDerWaals>::initialize() {
     constHost.push_back(RSCALE[i]);
     constHost.push_back(GSCALE[i]);
   }
-  assert( constHost.size() == 514 );
+  assert( constHost.size() < maxConst );
   cudaMemcpy(keysDevc,  &keysHost[0],  keysHost.size()*sizeof(int),cudaMemcpyHostToDevice);
   cudaMemcpy(rangeDevc, &rangeHost[0], rangeHost.size()*sizeof(int),cudaMemcpyHostToDevice);
   cudaMemcpy(sourceDevc,&sourceHost[0],sourceHost.size()*sizeof(gpureal),cudaMemcpyHostToDevice);

wrapper/test_parallel_coulombVdW.cxx

 #include "md.h"
 #include "vtgrapeproto.h"
 }
+//#define DATAFILE
 
 extern "C" void FMMcalccoulomb_ij(int ni, double* xi, double* qi, double* fi,
   int nj, double* xj, double* qj, double rscale, int tblno, double size, int periodicflag);
   int mpisize, mpirank;
   MPI_Comm_size(MPI_COMM_WORLD,&mpisize);
   MPI_Comm_rank(MPI_COMM_WORLD,&mpirank);
-//  const int N = 78537;
-//  const int nat = 22;
-//  const double size = 90;
+#ifdef DATAFILE
+  const int N = 78537;
+  const int nat = 22;
+  const double size = 90;
+#else
   const int N = 10000;
   const int nat = 16;
   const double size = 2;
+#endif
   double *xi     = new double [3*N];
   double *qi     = new double [N];
   double *pi     = new double [3*N];
   int *natex  = new int [N];
   MR3init();
 
-#if 1
+#ifdef DATAFILE
+  std::ifstream fid("datafile",std::ios::in);
+  for( int i=0; i<nat*nat; i++ ) {
+    fid >> rscale[i] >> gscale[i] >> frscale[i] >> fgscale[i];
+  }
+  int ic;
+  for( int i=0; i<N; i++ ) {
+    fid >> ic >> atypei[i] >> xi[3*i+0] >> xi[3*i+1] >> xi[3*i+2] >> qi[i];
+  }
+  fid.close();
+  for( int i=0; i<N; i++ ) {
+    xj[3*i+0] = xi[3*i+0];
+    xj[3*i+1] = xi[3*i+1];
+    xj[3*i+2] = xi[3*i+2];
+    qj[i] = qi[i];
+    atypei[i]--;
+    atypej[i] = atypei[i];
+  }
+#else
   srand48(mpirank);
   float average = 0;
   for( int i=0; i<N; i++ ) {
       fgscale[i*nat+j] = gscale[i*nat+j];
     }
   }
-#else
-  std::ifstream fid("datafile",std::ios::in);
-  for( int i=0; i<nat*nat; i++ ) {
-    fid >> rscale[i] >> gscale[i] >> frscale[i] >> fgscale[i];
-  }
-  int ic;
-  for( int i=0; i<N; i++ ) {
-    fid >> ic >> atypei[i] >> xi[3*i+0] >> xi[3*i+1] >> xi[3*i+2] >> qi[i];
-  }
-  fid.close();
-  for( int i=0; i<N; i++ ) {
-    xj[3*i+0] = xi[3*i+0];
-    xj[3*i+1] = xi[3*i+1];
-    xj[3*i+2] = xi[3*i+2];
-    qj[i] = qi[i];
-    atypei[i]--;
-    atypej[i] = atypei[i];
-  }
 #endif
   for( int i=0; i<N; i++ ) {
     pi[3*i+0] = pi[3*i+1] = pi[3*i+2] = 0;

wrapper/test_serial_coulomb.cxx

-#include <cmath>
-#include <cstdlib>
-#include <iostream>
-#include "mr3.h"
-
-const double R2MIN = 0.25;
-const double R2MAX = 64;
-
-extern "C" void FMMcalccoulomb_ij(int ni, double* xi, double* qi, double* fi,
-  int nj, double* xj, double* qj, double rscale, int tblno, double size, int periodicflag);
-
-int main() {
-  const int N = 10000;
-  const double size = 2;
-  double *xi     = new double [3*N];
-  double *qi     = new double [N];
-  double *pi     = new double [3*N];
-  double *fi     = new double [3*N];
-  double *pd     = new double [3*N];
-  double *fd     = new double [3*N];
-  double *xj     = new double [3*N];
-  double *qj     = new double [N];
-
-  srand48(0);
-  float average = 0;
-  for( int i=0; i!=N; ++i ) {
-    xi[3*i+0] = drand48() * size - size/2;
-    xi[3*i+1] = drand48() * size - size/2;
-    xi[3*i+2] = drand48() * size - size/2;
-    qi[i] = drand48()*2.0-1.0;
-    average += qi[i];
-    pi[3*i+0] = pi[3*i+1] = pi[3*i+2] = 0;
-    fi[3*i+0] = fi[3*i+1] = fi[3*i+2] = 0;
-    pd[3*i+0] = pd[3*i+1] = pd[3*i+2] = 0;
-    fd[3*i+0] = fd[3*i+1] = fd[3*i+2] = 0;
-  }
-  average /= N;
-  for( int i=0; i!=N; ++i ) {
-    qi[i] -= average;
-  }
-  average = 0;
-  for( int i=0; i!=N; ++i ) {
-    xj[3*i+0] = drand48() * size - size/2;
-    xj[3*i+1] = drand48() * size - size/2;
-    xj[3*i+2] = drand48() * size - size/2;
-    qj[i] = drand48()*2.0-1.0;
-  }
-  average /= N;
-  for( int i=0; i!=N; ++i ) {
-    qj[i] -= average;
-  }
-
-  FMMcalccoulomb_ij(N, xi, qi, pi, N, xj, qj, 0.0, 1, size, 0);
-  FMMcalccoulomb_ij(N, xi, qi, fi, N, xj, qj, 0.0, 0, size, 0);
-#if 0
-  MR3calccoulomb_ij(N, xi, qi, pd, N, xj, qj, 1.0, 1, size, 0);
-  MR3calccoulomb_ij(N, xi, qi, fd, N, xj, qj, 1.0, 0, size, 0);
-#else
-  for( int i=0; i!=N; ++i ) {
-    double P = 0, Fx = 0, Fy = 0, Fz = 0;
-    for( int j=0; j!=N; ++j ) {
-      double dx = xi[3*i+0] - xj[3*j+0];
-      double dy = xi[3*i+1] - xj[3*j+1];
-      double dz = xi[3*i+2] - xj[3*j+2];
-      double R2 = dx * dx + dy * dy + dz * dz;
-      double invR = 1 / std::sqrt(R2);
-      if( R2 == 0 ) invR = 0;
-      double invR3 = qj[j] * invR * invR * invR;
-      P += qj[j] * invR;
-      Fx += dx * invR3;
-      Fy += dy * invR3;
-      Fz += dz * invR3;
-    }
-    pd[3*i+0] += P;
-    fd[3*i+0] += Fx;
-    fd[3*i+1] += Fy;
-    fd[3*i+2] += Fz;
-  }
-#endif
-  double Pd = 0, Pn = 0, Fd = 0, Fn = 0;
-  float sums = 0;
-  for( int i=0; i!=N; ++i ) {
-    Pd += (pi[3*i+0] - pd[3*i+0]) * (pi[3*i+0] - pd[3*i+0]);
-    Pn += pd[3*i+0] * pd[3*i+0];
-    Fd += (fi[3*i+0] - fd[3*i+0]) * (fi[3*i+0] - fd[3*i+0])
-        + (fi[3*i+1] - fd[3*i+1]) * (fi[3*i+1] - fd[3*i+1])
-        + (fi[3*i+2] - fd[3*i+2]) * (fi[3*i+2] - fd[3*i+2]);
-    Fn += fd[3*i+0] * fd[3*i+0] + fd[3*i+1] * fd[3*i+1] + fd[3*i+2] * fd[3*i+2];
-    sums += qi[i]*fd[3*i+0];
-  }
-  std::cout << "Coulomb       potential : " << sqrtf(Pd/Pn) << std::endl;
-  std::cout << "Coulomb       force     : " << sqrtf(Fd/Fn) << std::endl;
-  std::cout << sums << std::endl;
-
-  delete[] xi;
-  delete[] qi;
-  delete[] pi;
-  delete[] fi;
-  delete[] pd;
-  delete[] fd;
-  delete[] xj;
-  delete[] qj;
-}

wrapper/test_serial_coulombVdW.cxx

 #include "md.h"
 #include "vtgrapeproto.h"
 }
+//#define DATAFILE
 
 extern "C" void FMMcalccoulomb_ij(int ni, double* xi, double* qi, double* fi,
   int nj, double* xj, double* qj, double rscale, int tblno, double size, int periodicflag);
   int tblno, double size, int periodicflag);
 
 int main() {
-//  const int N = 78537;
-//  const int nat = 22;
-//  const double size = 90;
+#ifdef DATAFILE
+  const int N = 78537;
+  const int nat = 22;
+  const double size = 90;
+#else
   const int N = 10000;
   const int nat = 16;
   const double size = 2;
+#endif
   double *xi     = new double [3*N];
   double *qi     = new double [N];
   double *pi     = new double [3*N];
   int *natex  = new int [N];
   MR3init();
 
-#if 1
+#ifdef DATAFILE
+  std::ifstream fid("datafile",std::ios::in);
+  for( int i=0; i<nat*nat; i++ ) {
+    fid >> rscale[i] >> gscale[i] >> frscale[i] >> fgscale[i];
+  }
+  int ic;
+  for( int i=0; i<N; i++ ) {
+    fid >> ic >> atypei[i] >> xi[3*i+0] >> xi[3*i+1] >> xi[3*i+2] >> qi[i];
+  }
+  fid.close();
+  for( int i=0; i<N; i++ ) {
+    xj[3*i+0] = xi[3*i+0];
+    xj[3*i+1] = xi[3*i+1];
+    xj[3*i+2] = xi[3*i+2];
+    qj[i] = qi[i];
+    atypei[i]--;
+    atypej[i] = atypei[i];
+  }
+#else
   srand48(0);
   float average = 0;
   for( int i=0; i<N; i++ ) {
       fgscale[i*nat+j] = gscale[i*nat+j];
     }
   }
-#else
-  std::ifstream fid("datafile",std::ios::in);
-  for( int i=0; i<nat*nat; i++ ) {
-    fid >> rscale[i] >> gscale[i] >> frscale[i] >> fgscale[i];
-  }
-  int ic;
-  for( int i=0; i<N; i++ ) {
-    fid >> ic >> atypei[i] >> xi[3*i+0] >> xi[3*i+1] >> xi[3*i+2] >> qi[i];
-  }
-  fid.close();
-  for( int i=0; i<N; i++ ) {
-    xj[3*i+0] = xi[3*i+0];
-    xj[3*i+1] = xi[3*i+1];
-    xj[3*i+2] = xi[3*i+2];
-    qj[i] = qi[i];
-    atypei[i]--;
-    atypej[i] = atypei[i];
-  }
 #endif
   for( int i=0; i<N; i++ ) {
     pi[3*i+0] = pi[3*i+1] = pi[3*i+2] = 0;
   FMMcalcvdw_ij(N,xi,atypei,pi,N,xj,atypej,nat,gscale,rscale,3,size,0);
   FMMcalcvdw_ij(N,xi,atypei,fi,N,xj,atypej,nat,fgscale,frscale,2,size,0);
 #if 1
-  MR3calcvdw_ij_host(N,xi,atypei,pd,N,xj,atypej,nat,gscale,rscale,3,size,0);
-  MR3calcvdw_ij_host(N,xi,atypei,fd,N,xj,atypej,nat,fgscale,frscale,2,size,0);
+  MR3calcvdw_ij(N,xi,atypei,pd,N,xj,atypej,nat,gscale,rscale,3,size,0);
+  MR3calcvdw_ij(N,xi,atypei,fd,N,xj,atypej,nat,fgscale,frscale,2,size,0);
 #else
   for( int i=0; i<N; i++ ) {
     double P = 0, Fx = 0, Fy = 0, Fz = 0;
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.