Commits

Rio Yokota committed c3cf12c

Fixed GPUSphericalLaplace.cu for M2M when r = 0 during periodic summation.

  • Participants
  • Parent commits 008716e

Comments (0)

Files changed (8)

File Makefile.include

 .PHONY: docs
 
 ### CUDA path
-CUDA_INSTALL_PATH = /usr/local/cuda
+CUDA_INSTALL_PATH = /usr/local/cuda4.1/cuda
 
 ### VTK path
 VTK_INCLUDE_PATH = /usr/include/vtk-5.8
 VTK_LIBRARY_PATH = /usr/lib/vtk-5.8
 
 ### choose CPU or GPU
-DEVICE  = CPU
-#DEVICE  = GPU
+#DEVICE  = CPU
+DEVICE  = GPU
 
 ### choose Cartesian or spherical expansion
 #EXPAND  = Cartesian

File kernel/CPUSphericalLaplace.cxx

 namespace{
 //! Get r,theta,phi from x,y,z
 void cart2sph(real& r, real& theta, real& phi, vect dist) {
-  r = sqrt(norm(dist))+EPS;                                   // r = sqrt(x^2 + y^2 + z^2) + eps
-  theta = acos(dist[2] / r);                                  // theta = acos(z / r)
+  r = sqrt(norm(dist)) * (1 + EPS);                           // r = sqrt(x^2 + y^2 + z^2)
+  if( r < EPS ) {                                             // If r == 0
+    theta = 0;                                                //  theta can be anything so we set it to 0
+  } else {                                                    // If r != 0
+    theta = acos(dist[2] / r);                                //  theta = acos(z / r)
+  }                                                           // End if for r == 0
   if( fabs(dist[0]) + fabs(dist[1]) < EPS ) {                 // If |x| < eps & |y| < eps
     phi = 0;                                                  //  phi can be anything so we set it to 0
   } else if( fabs(dist[0]) < EPS ) {                          // If |x| < eps

File kernel/GPUSphericalLaplace.cu

 namespace {                                                     // Prevent overlap of definitions among equations
 __device__ void cart2sph(gpureal& r, gpureal& theta, gpureal& phi,// Get r,theta,phi from x,y,z on GPU
                          gpureal dx, gpureal dy, gpureal dz) {
-  r = sqrtf(dx * dx + dy * dy + dz * dz)+EPS;                   // r = sqrt(x^2 + y^2 + z^2) + eps
-  theta = acosf(dz / r);                                        // theta = acos(z / r)
+  r = sqrtf(dx * dx + dy * dy + dz * dz) * (1 + EPS);           // r = sqrt(x^2 + y^2 + z^2)
+  if( r < EPS ) {                                               // If r == 0
+    theta = 0;                                                  //  theta can be anything so we set it to 0
+  } else {                                                      // If r != 0
+    theta = acosf(dz / r);                                      //  theta = acos(z / r)
+  }                                                             // End if for r == 0
   if( fabs(dx) + fabs(dy) < EPS ) {                             // If |x| < eps & |y| < eps
     phi = 0;                                                    //  phi can be anything so we set it to 0
   } else if( fabs(dx) < EPS ) {                                 // If |x| < eps
     }                                                           //  End loop up to n
     if( n <= nn ) Ynm = rhon * p * factShrd[n-m];               //  rho^(-n-1) * Ynm
     YnmShrd[l] = Ynm;                                           //  Put Ynm in shared memory
-  }                                                             // End loop over coefficients in Ynm
+  }                                                             // Enttd loop over coefficients in Ynm
   __syncthreads();                                              // Syncronize threads
 }
 }                                                               // End anonymous namespace
     cart2sph(rho,alpha,beta,d.x,d.y,d.z);
     evalMultipole(YnmShrd,rho,alpha,factShrd);
     LaplaceM2M_core(target,beta,factShrd,YnmShrd,sourceShrd);
+    if(d.x*d.x+d.y*d.y+d.z*d.z<EPS&&threadIdx.x==0) printf("#FMM output: %f\n",target[0]);
   }
   itarget = blockIdx.x * THREADS + threadIdx.x;
   targetGlob[2*itarget+0] = target[0];

File mr3/Makefile

 
 .SUFFIXES: .cu .c .o
 
-#C++ = g++
-#CC = gcc
-#CC = icc -xW -tpp7
-#F90 = gfortran
-#F77 = gfortran
-F90 = ifort
-F77 = ifort
-
-CC = icc -openmp -xW -tpp7
-C++ = icpc -openmp -xW -tpp7
+CC = gcc
+C++ = g++
+#CC = icc -openmp -xW -tpp7
+#C++ = icpc -openmp -xW -tpp7
+F90 = gfortran
+F77 = gfortran
 #F90 = ifort -xW -tpp7
 #F77 = $(F90)
 

File mr3/emutestvg

Binary file removed.

File mr3/sample_md3vg

Binary file removed.

File wrapper/test_parallel_coulombVdW.cxx

     }
 #endif
   }
+  double fc[3];
+  for( int d=0; d<3; ++d ) fc[d]=0;
+  for( int i=0; i<N; ++i ) {
+    for( int d=0; d<3; ++d ) {
+      fc[d] += qi[i] * xi[3*i+d];
+    }
+  }
+  for( int i=0; i<N; ++i ) {
+    pd[3*i+0] += 2.0 * M_PI / (3.0 * size * size * size)
+              * (fc[0] * fc[0] + fc[1] * fc[1] + fc[2] * fc[2]) / N;
+  }
+  for( int i=0; i<N; ++i ) {
+    for( int d=0; d<3; ++d ) {
+      fd[3*i+d] -= 4.0 * M_PI * qi[i] * fc[d] / (3.0 * size * size * size);
+    }
+  }
   double Pd = 0, Pn = 0, Fd = 0, Fn = 0;
   for( int i=0; i<N; i++ ) {
     pd[3*i+0] *= 0.5;

File wrapper/test_serial_coulombVdW.cxx

 #else
   const int N = 10000;
   const int nat = 16;
-  const double size = 20;
+  const double size = 2;
 #endif
   double *xi     = new double [3*N];
   double *qi     = new double [N];
     natex[i] = 0;
   }
 
-/*
   FMMcalccoulomb_ij(N, xi, qi, pi, N, xj, qj, 0.0, 1, size, 0);
   FMMcalccoulomb_ij(N, xi, qi, fi, N, xj, qj, 0.0, 0, size, 0);
 #if 1
     fd[3*i+2] += qi[i] * Fz;
   }
 #endif
+  double fc[3];
+  for( int d=0; d<3; ++d ) fc[d]=0;
+  for( int i=0; i<N; ++i ) {
+    for( int d=0; d<3; ++d ) {
+      fc[d] += qi[i] * xi[3*i+d];
+    }
+  }
+  for( int i=0; i<N; ++i ) {
+    pd[3*i+0] += 2.0 * M_PI / (3.0 * size * size * size)
+              * (fc[0] * fc[0] + fc[1] * fc[1] + fc[2] * fc[2]) / N;
+  }
+  for( int i=0; i<N; ++i ) {
+    for( int d=0; d<3; ++d ) {
+      fd[3*i+d] -= 4.0 * M_PI * qi[i] * fc[d] / (3.0 * size * size * size);
+    }
+  }
   double Pd = 0, Pn = 0, Fd = 0, Fn = 0;
   for( int i=0; i<N; i++ ) {
     pd[3*i+0] *= 0.5;
   }
   std::cout << "Coulomb       potential : " << sqrtf(Pd/Pn) << std::endl;
   std::cout << "Coulomb       force     : " << sqrtf(Fd/Fn) << std::endl;
-*/
-
 
   FMMcalcvdw_ij(N,xi,atypei,pi,N,xj,atypej,nat,gscale,rscale,3,size,0);
   FMMcalcvdw_ij(N,xi,atypei,fi,N,xj,atypej,nat,fgscale,frscale,2,size,0);
     fd[3*i+2] += Fz;
   }
 #endif
-  double Pd = 0, Pn = 0, Fd = 0, Fn = 0;
   Pd = Pn = Fd = Fn = 0;
   for( int i=0; i<N; i++ ) {
 #if 1