Commits

Rio Yokota committed 6738dfa

IneJ wrapper for laplace.

Comments (0)

Files changed (6)

   const real_t THETA = 0.5;
   const real_t R = 2 / THETA;
 
-  for (B_iter B=jbodies.begin(); B!=jbodies.end(); ++B) {
+  for (B_iter B=jbodies.begin(); B!=jbodies.end(); B++) {
     B->X[0] = 2 * drand48();
     B->X[1] = 2 * drand48();
     B->X[2] = 2 * drand48();
   CI->M = 1;
   CI->L = 0;
 #if Cartesian
-  for( int i=1; i<MTERM; ++i ) CJ->M[i] /= CJ->M[0];
+  for (int i=1; i<MTERM; i++) CJ->M[i] /= CJ->M[0];
 #endif
   kernel.M2L(CI,CJ,false);
 
   Ci->M = 1;
   Ci->L = 0;
 #if Cartesian
-  for( int i=1; i<MTERM; ++i ) Cj->M[i] /= Cj->M[0];
+  for (int i=1; i<MTERM; i++) Cj->M[i] /= Cj->M[0];
 #endif
   kernel.M2L(Ci,Cj,false);
 #endif
 
-  for (B_iter B=bodies.begin(); B!=bodies.end(); ++B) {
+  for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
     B->X[0] = R + 2 + 2 * drand48();
     B->X[1] = R + 2 + 2 * drand48();
     B->X[2] = R + 2 + 2 * drand48();
   Ci->NCBODY = bodies.size();
   kernel.L2P(Ci);
 
-  for (B_iter B=bodies2.begin(); B!=bodies2.end(); ++B) {
+  for (B_iter B=bodies2.begin(); B!=bodies2.end(); B++) {
     *B = bodies[B-bodies2.begin()];
     B->TRG = 0;
   }
   Ci->NDBODY = bodies2.size();
   Ci->BODY = bodies2.begin();
   kernel.P2P(Ci,Cj,false);
-  for (B_iter B=bodies2.begin(); B!=bodies2.end(); ++B) {
+  for (B_iter B=bodies2.begin(); B!=bodies2.end(); B++) {
     B->TRG /= B->SRC;
   }
 
   std::fstream file;
   file.open("kernel.dat", std::ios::out | std::ios::app);
   double diff = 0, norm = 0;
-  for (B_iter B=bodies.begin(),B2=bodies2.begin(); B!=bodies.end(); ++B,++B2) {
+  for (B_iter B=bodies.begin(),B2=bodies2.begin(); B!=bodies.end(); B++,B2++) {
     diff += (B->TRG[0] - B2->TRG[0]) * (B->TRG[0] - B2->TRG[0]);
     norm += B2->TRG[0] * B2->TRG[0];
   }

examples/parallel.cxx

 #pragma omp master
 #endif
   if (args.verbose) logger.printTitle("Profiling");
+  logger.startTimer("Total FMM");
   bodies.resize(args.numBodies);
   data.initBodies(bodies, args.distribution, LET.MPIRANK, LET.MPISIZE);
-  logger.startTimer("Total FMM");
   Bounds localBounds = boundbox.getBounds(bodies);
 #if IneJ
   jbodies.resize(args.numBodies);
   LET.commBodies();
   LET.commCells();
 #if IneJ
-  traversal.dualTreeTraversal(cells, jcells, cycle, args.mutual);
+  traversal.dualTreeTraversal(cells, jcells, cycle);
 #else
   traversal.dualTreeTraversal(cells, cells, cycle, args.mutual);
   jbodies = bodies;
     }
     assert( ic == int(icells.size()) );
 #endif
-    traversal.dualTreeTraversal(cells, jcells, cycle, false);
+    traversal.dualTreeTraversal(cells, jcells, cycle);
   }
 #else
   jbodies = bodies;
       sync_tasks;                                               // Synchronize tasks
     }
 #if Cartesian
-    for( int i=1; i<MTERM; ++i ) C->M[i] /= C->M[0];            // Normalize multipole expansion coefficients
+    for (int i=1; i<MTERM; i++) C->M[i] /= C->M[0];             // Normalize multipole expansion coefficients
 #endif
     real_t x = 1.0 / THETA;                                     // Inverse of theta
 #if ERROR_OPT
   void setPoints(B_iter B0, B_iter BN) {
     vtkPoints * group = vtkPoints::New();
     group->SetNumberOfPoints(BN-B0);
-    for (B_iter B=B0; B!=BN; ++B) {
+    for (B_iter B=B0; B!=BN; B++) {
       group->SetPoint(B-B0, B->X[0], B->X[1], B->X[2]);
     }
     groups.push_back(group);
 #include "updownpass.h"
 #include "localessentialtree.h"
 
-extern "C" void FMM(int n, double* x, double* q, double *p, double* f, int periodicflag) {
+extern "C" void FMM(int ni, double * xi, double * pi, double * fi, int nj, double * xj, double *qj, int periodicflag) {
   Args args;
   Bodies bodies, jbodies;
   Cells cells, jcells;
   Logger logger;
   Sort sort;
 
-  args.numBodies = n;
+  args.numBodies = ni;
   args.THETA = 0.6;
   args.NCRIT = 16;
   args.NSPAWN = 1000;
   args.IMAGES = ((periodicflag & 0x1) == 0) ? 0 : 3;
-  args.mutual = 1;
+  args.mutual = 0;
   args.verbose = 1;
   args.distribution = "external";
 
 #pragma omp master
 #endif
   if (args.verbose) logger.printTitle("Profiling");
-  bodies.resize(n);
+  logger.startTimer("Total FMM");
+  bodies.resize(ni);
   for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
     int i = B-bodies.begin();
-    B->X[0] = x[3*i+0];
-    B->X[1] = x[3*i+1];
-    B->X[2] = x[3*i+2];
-    B->SRC  = q[i];
-    B->TRG[0] =  p[i];
-    B->TRG[1] = -f[3*i+0];
-    B->TRG[2] = -f[3*i+1];
-    B->TRG[3] = -f[3*i+2];
+    B->X[0] = xi[3*i+0];
+    B->X[1] = xi[3*i+1];
+    B->X[2] = xi[3*i+2];
+    B->SRC  = 1;
+    B->TRG[0] = pi[i];
+    B->TRG[1] = fi[3*i+0];
+    B->TRG[2] = fi[3*i+1];
+    B->TRG[3] = fi[3*i+2];
     B->IBODY  = i;
   }
-  logger.startTimer("Total FMM");
-
   Bounds localBounds = boundbox.getBounds(bodies);
+  jbodies.resize(nj);
+  for (B_iter B=jbodies.begin(); B!=jbodies.end(); B++) {
+    int i = B-jbodies.begin();
+    B->X[0] = xj[3*i+0];
+    B->X[1] = xj[3*i+1];
+    B->X[2] = xj[3*i+2];
+    B->SRC  = qj[i];
+  }
+  localBounds = boundbox.getBounds(jbodies,localBounds);
   Bounds globalBounds = LET.allreduceBounds(localBounds);
   localBounds = LET.partition(bodies,globalBounds);
   bodies = sort.sortBodies(bodies);
   bodies = LET.commBodies(bodies);
+  LET.partition(jbodies,globalBounds);
+  jbodies = sort.sortBodies(jbodies);
+  jbodies = LET.commBodies(jbodies);
   Box box = boundbox.bounds2box(localBounds);
   logger.startPAPI();
   tree.buildTree(bodies, cells, box);
   pass.upwardPass(cells);
-  LET.setLET(cells,localBounds,cycle);
+  tree.buildTree(jbodies, jcells, box);
+  pass.upwardPass(jcells);
+  LET.setLET(jcells,localBounds,cycle);
   LET.commBodies();
   LET.commCells();
-  traversal.dualTreeTraversal(cells, cells, cycle);
+  traversal.dualTreeTraversal(cells, jcells, cycle, args.mutual);
   for (int irank=1; irank<LET.MPISIZE; irank++) {
     LET.getLET(jcells,(LET.MPIRANK+irank)%LET.MPISIZE);
     traversal.dualTreeTraversal(cells, jcells, cycle);
 
   for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
     int i = B-bodies.begin();
-    x[3*i+0] = B->X[0];
-    x[3*i+1] = B->X[1];
-    x[3*i+2] = B->X[2];
-    q[i]     = B->SRC;
-    p[i]     =  B->TRG[0];
-    f[3*i+0] = -B->TRG[1];
-    f[3*i+1] = -B->TRG[2];
-    f[3*i+2] = -B->TRG[3];
+    pi[i]     = B->TRG[0];
+    fi[3*i+0] = B->TRG[1];
+    fi[3*i+1] = B->TRG[2];
+    fi[3*i+2] = B->TRG[3];
   }
 }

wrappers/test_laplace.cxx

 #include <iomanip>
 #include <iostream>
 
-extern "C" void FMM(int n, double* x, double* q, double *p, double* f, int periodicflag);
+extern "C" void FMM(int ni, double * xi, double * pi, double * fi, int nj, double * xj, double * qj, int periodicflag);
 
-extern "C" void MPI_Shift(double *var, int n, int mpisize, int mpirank) {
+extern "C" void MPI_Shift(double * var, int n, int mpisize, int mpirank) {
   double *buf = new double [n];
   const int isend = (mpirank + 1          ) % mpisize;
   const int irecv = (mpirank - 1 + mpisize) % mpisize;
   MPI_Request sreq, rreq;
-
   MPI_Isend(var, n, MPI_DOUBLE, irecv, 1, MPI_COMM_WORLD, &sreq);
   MPI_Irecv(buf, n, MPI_DOUBLE, isend, 1, MPI_COMM_WORLD, &rreq);
   MPI_Wait(&sreq, MPI_STATUS_IGNORE);
   const double size = 2 * M_PI;
   const int stringLength = 20;
   double *xi = new double [3*N];
-  double *qi = new double [N];
   double *pi = new double [N];
   double *fi = new double [3*N];
   double *pd = new double [N];
     xi[3*i+0] = drand48() * size - M_PI;
     xi[3*i+1] = drand48() * size - M_PI;
     xi[3*i+2] = drand48() * size - M_PI;
-    qi[i] = 1. / N;
     pi[i] = 0;
     fi[3*i+0] = fi[3*i+1] = fi[3*i+2] = 0;
     pd[i] = 0;
     fd[3*i+0] = fd[3*i+1] = fd[3*i+2] = 0;
-    xj[3*i+0] = xi[3*i+0];
-    xj[3*i+1] = xi[3*i+1];
-    xj[3*i+2] = xi[3*i+2];
-    qj[i] = qi[i];
+    xj[3*i+0] = drand48() * size - M_PI;
+    xj[3*i+1] = drand48() * size - M_PI;
+    xj[3*i+2] = drand48() * size - M_PI;
+    qj[i] = 1. / N;
   }
 
-  FMM(N, xi, qi, pi, fi, 0);
-  for (int i=0; i<N; i++) {
-    xj[3*i+0] = xi[3*i+0];
-    xj[3*i+1] = xi[3*i+1];
-    xj[3*i+2] = xi[3*i+2];
-  }
+  FMM(N, xi, pi, fi, N, xj, qj, 0);
   if (mpirank == 0) std::cout << "--- MPI direct sum ---------------" << std::endl;
   for (int irank=0; irank<mpisize; irank++) {
     if (mpirank==0) std::cout << "Direct loop          : " << irank+1 << "/" << mpisize << std::endl;
         Fz += dz * invR3;
       }
       pd[i] += P;
-      fd[3*i+0] += Fx;
-      fd[3*i+1] += Fy;
-      fd[3*i+2] += Fz;
+      fd[3*i+0] -= Fx;
+      fd[3*i+1] -= Fy;
+      fd[3*i+2] -= Fz;
     }
   }
   double diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0, diff3 = 0, norm3 = 0, diff4 = 0, norm4 = 0;
   }    
 
   delete[] xi;
-  delete[] qi;
   delete[] pi;
   delete[] fi;
   delete[] pd;