Commits

Rio Yokota committed c831595

Preparing for IneJ wrapper.

  • Participants
  • Parent commits 55f1af2

Comments (0)

Files changed (8)

examples/parallel.cxx

   LocalEssentialTree LET(args.IMAGES);
   logger.verbose = LET.MPIRANK == 0;
   args.verbose &= logger.verbose;
+  args.numBodies /= LET.MPISIZE;
   if (args.verbose) {
     boundbox.verbose = true;
     tree.verbose = true;
 #pragma omp parallel
 #pragma omp master
 #endif
-  int numBodies = args.numBodies / LET.MPISIZE;
   if (args.verbose) logger.printTitle("Profiling");
-  bodies.resize(numBodies);
+  bodies.resize(args.numBodies);
   data.initBodies(bodies, args.distribution, LET.MPIRANK, LET.MPISIZE);
   logger.startTimer("Total FMM");
   Bounds localBounds = boundbox.getBounds(bodies);
 #if IneJ
-  jbodies.resize(numBodies);
+  jbodies.resize(args.numBodies);
   data.initBodies(jbodies, args.distribution, LET.MPIRANK+LET.MPISIZE, LET.MPISIZE);
   localBounds = boundbox.getBounds(jbodies,localBounds);
 #endif
   void usage(char * name) {
     fprintf(stderr,
             "Usage: %s [options]\n"
-            "Options:\n"
+            "Option : Description (Default value):\n"
             " --numBodies : Number of bodies (%d)\n"
             " --numTarget : Number of targets for error checking (%d)\n"
             " --ncrit : Number of bodies per leaf cell (%d)\n"
-            " --nspawn : Threshold for splitting both cells during recursion (%d)\n"
+            " --nspawn : Threshold for stopping thread spawning during recursion (%d)\n"
             " --images : Number of periodic image levels (%d)\n"
             " --theta : Multipole acceptance criterion (%f)\n"
             " --mutual [0/1] : Use mutual interaction (%d)\n"
   }
 
  public:
-  Args(int argc, char ** argv) : numBodies(1000000), numTarget(100), NCRIT(16), NSPAWN(1000), IMAGES(0),
+  Args(int argc=0, char ** argv=NULL) : numBodies(1000000), numTarget(100), NCRIT(16), NSPAWN(1000), IMAGES(0),
     THETA(.6), mutual(1), verbose(1), distribution("cube") {
     while (1) {
       int option_index;

include/dataset.h

 
 //! Read target values from file
   void readTarget(Bodies &bodies, int mpirank) {
-    char fname[256];                                            // File name for saving direct calculation values
-    sprintf(fname,"direct%4.4d",mpirank);                       // Set file name
-    std::ifstream file(fname,std::ios::in | std::ios::binary);  // Open file
+    std::stringstream name;                                     // File name
+    name << "direct" << std::setfill('0') << std::setw(4)       // Set format
+         << mpirank << ".dat";                                  // Create file name for saving direct calculation values
+    std::ifstream file(name.str(),std::ios::in | std::ios::binary);// Open file
     file.seekg(filePosition);                                   // Set position in file
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       file >> B->TRG[0];                                        //  Read data for potential
 
 //! Write target values to file
   void writeTarget(Bodies &bodies, int mpirank) {
-    char fname[256];                                            // File name for saving direct calculation values
-    sprintf(fname,"direct%4.4d",mpirank);                       // Set file name
-    std::ofstream file(fname,std::ios::out | std::ios::app | std::ios::binary);// Open file
+    std::stringstream name;                                     // File name
+    name << "direct" << std::setfill('0') << std::setw(4)       // Set format
+         << mpirank << ".dat";                                  // Create file name for saving direct calculation values
+    std::ofstream file(name.str(),std::ios::out | std::ios::app | std::ios::binary);// Open file
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       file << B->TRG[0] << std::endl;                           //  Write data for potential
       file << B->TRG[1] << std::endl;                           //  Write data for x acceleration

include/localessentialtree.h

 
 //! Get local essential tree from rank "irank".
   void getLET(Cells &cells, int irank) {
-    startTimer("Get LET");                                      // Start timer
+    std::stringstream event;                                    // Event name
+    event << "Get LET from rank " << irank;                     // Create event name based on irank
+    startTimer(event.str());                                    // Start timer
     for (int i=recvCellCount[irank]-1; i>=0; i--) {             // Loop over receive cells
       C_iter C = recvCells.begin() + recvCellDispl[irank] + i;  //  Iterator of receive cell
       if (C->NCBODY != 0) {                                     //  If cell has bodies
     }                                                           // End loop over receive cells
     cells.resize(recvCellCount[irank]);                         // Resize cell vector for LET
     cells.assign(recvCells.begin()+recvCellDispl[irank],recvCells.begin()+recvCellDispl[irank]+recvCellCount[irank]);
-    stopTimer("Get LET",verbose);                               // Stop timer
+    stopTimer(event.str(),verbose);                             // Stop timer
   }
 
 //! Send bodies
 #include <pthread.h>
 #include <queue>
 #include <string>
+#include <sstream>
 #include <sys/time.h>
 #include <vector>
 
 
 //! Write traces of all events
   inline void writeTrace(int mpirank=0) {
-    char fname[256];                                            // File name
-    sprintf(fname,"trace%4.4d.svg",mpirank);                    // Create file name for trace
-    std::ofstream traceFile(fname);                             // Open trace log file
+    startTimer("Write trace");                                  // Start timer
+    std::stringstream name;                                     // File name
+    name << "trace" << std::setfill('0') << std::setw(4)        // Set format
+         << mpirank << ".svg";                                  // Create file name for trace
+    std::ofstream traceFile(name.str());                        // Open trace log file
     traceFile << "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" // Header statements for trace log file
       << "<!DOCTYPE svg PUBLIC \"-_W3C_DTD SVG 1.0_EN\" \"http://www.w3.org/TR/SVG/DTD/svg10.dtd\">\n"
       << "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n"
     }                                                           // End while loop for queue of traces
     traceFile << "  </g>\n" "</svg>\n";                         // Footer for trace log file
     traceFile.close();                                          // Close trace log file
+    stopTimer("Write trace",verbose);                           // Stop timer
   }
 #else
   inline void startTracer(Trace) {}

wrappers/Makefile

 laplace: test_laplace.o ../kernels/Laplace$(BASIS)$(DEVICE).o ../kernels/LaplaceP2P$(DEVICE).o
 	make liblaplace.a
 	$(CXX) $? -L. -llaplace $(LFLAGS)
-	mpirun -np 4 ./a.out
+	mpirun -np 8 ./a.out
 
 liblaplace.a: laplace.o
 	ar ruv liblaplace.a $?
 matern: test_matern.o ../kernels/Matern$(BASIS)$(DEVICE).o ../kernels/MaternP2P$(DEVICE).o
 	make libmatern.a
 	$(CXX) $? -L. -lmatern $(LFLAGS)
-	mpirun -np 4 ./a.out
+	mpirun -np 8 ./a.out
 
 libmatern.a: matern.o
 	ar ruv libmatern.a $?

wrappers/laplace.cxx

+#include "args.h"
 #include "boundbox.h"
 #include "buildtree.h"
 #include "sort.h"
 #include "localessentialtree.h"
 
 extern "C" void FMM(int n, double* x, double* q, double *p, double* f, int periodicflag) {
+  Args args;
   Bodies bodies, jbodies;
   Cells cells, jcells;
+  Logger logger;
   Sort sort;
 
-  const int NCRIT = 16;
-  const int NSPAWN = 1000;
-  const int IMAGES = ((periodicflag & 0x1) == 0) ? 0 : 3;
-  const real_t THETA = 0.6;
+  args.numBodies = n;
+  args.THETA = 0.6;
+  args.NCRIT = 16;
+  args.NSPAWN = 1000;
+  args.IMAGES = ((periodicflag & 0x1) == 0) ? 0 : 3;
+  args.mutual = 1;
+  args.verbose = 1;
+  args.distribution = "external";
+
   const real_t cycle = 2 * M_PI;
-
-  BoundBox boundbox(NSPAWN);
-  BuildTree tree(NCRIT,NSPAWN);
-  UpDownPass pass(THETA);
-  Traversal traversal(NSPAWN,IMAGES);
-  LocalEssentialTree LET(IMAGES);
-  boundbox.verbose = LET.MPIRANK == 0;
-  tree.verbose = LET.MPIRANK == 0;
-  pass.verbose = LET.MPIRANK == 0;
-  traversal.verbose = LET.MPIRANK == 0;
-  LET.verbose = LET.MPIRANK == 0;
-
+  BoundBox boundbox(args.NSPAWN);
+  BuildTree tree(args.NCRIT,args.NSPAWN);
+  UpDownPass pass(args.THETA);
+  Traversal traversal(args.NSPAWN,args.IMAGES);
+  LocalEssentialTree LET(args.IMAGES);
+  logger.verbose = LET.MPIRANK == 0;
+  args.verbose &= logger.verbose;
+  if (args.verbose) {
+    boundbox.verbose = true;
+    tree.verbose = true;
+    pass.verbose = true;
+    traversal.verbose = true;
+    LET.verbose = true;
+    logger.printTitle("Parameters");
+  }
+  if(LET.MPIRANK == 0) args.print(logger.stringLength,P);
+#if AUTO
+  traversal.timeKernels();
+#endif
+#if _OPENMP
+#pragma omp parallel
+#pragma omp master
+#endif
+  if (args.verbose) logger.printTitle("Profiling");
   bodies.resize(n);
   for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
     int i = B-bodies.begin();
     B->TRG[3] = -f[3*i+2];
     B->IBODY  = i;
   }
+  logger.startTimer("Total FMM");
 
   Bounds localBounds = boundbox.getBounds(bodies);
   Bounds globalBounds = LET.allreduceBounds(localBounds);
   bodies = sort.sortBodies(bodies);
   bodies = LET.commBodies(bodies);
   Box box = boundbox.bounds2box(localBounds);
+  logger.startPAPI();
   tree.buildTree(bodies, cells, box);
   pass.upwardPass(cells);
   LET.setLET(cells,localBounds,cycle);
     LET.getLET(jcells,(LET.MPIRANK+irank)%LET.MPISIZE);
     traversal.dualTreeTraversal(cells, jcells, cycle);
   }
+  pass.downwardPass(cells);
 
-  pass.downwardPass(cells);
   LET.unpartition(bodies);
   bodies = sort.sortBodies(bodies);
   bodies = LET.commBodies(bodies);
   for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
     B->ICELL = B->IBODY;
   }
-  Bodies buffer = bodies;
-  bodies = sort.sortBodies(buffer);
+  bodies = sort.sortBodies(bodies);
+  logger.stopPAPI();
+  logger.stopTimer("Total FMM");
+  if (logger.verbose) {
+    logger.printTitle("Total runtime");
+    logger.printTime("Total FMM");
+  }
 
   for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
     int i = B-bodies.begin();

wrappers/test_laplace.cxx

   int mpisize, mpirank;
   MPI_Comm_size(MPI_COMM_WORLD, &mpisize);
   MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
-  const int N = 1000000;
+  const int N = 1000000 / mpisize;
   const double size = 2 * M_PI;
+  const int stringLength = 20;
   double *xi = new double [3*N];
   double *qi = new double [N];
   double *pi = new double [N];
     xj[3*i+1] = xi[3*i+1];
     xj[3*i+2] = xi[3*i+2];
   }
+  if (mpirank == 0) std::cout << "--- MPI direct sum ---------------" << std::endl;
   for (int irank=0; irank<mpisize; irank++) {
     if (mpirank==0) std::cout << "Direct loop          : " << irank+1 << "/" << mpisize << std::endl;
     MPI_Shift(xj, 3*N, mpisize, mpirank);
       fd[3*i+2] += Fz;
     }
   }
-  double Pd = 0, Pn = 0, Fd = 0, Fn = 0;
+  double diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0, diff3 = 0, norm3 = 0, diff4 = 0, norm4 = 0;
   for (int i=0; i<100; i++) {
-    Pd += (pi[i] - pd[i]) * (pi[i] - pd[i]);
-    Pn += pd[i] * pd[i];
-    Fd += (fi[3*i+0] - fd[3*i+0]) * (fi[3*i+0] - fd[3*i+0])
-        + (fi[3*i+1] - fd[3*i+1]) * (fi[3*i+1] - fd[3*i+1])
-        + (fi[3*i+2] - fd[3*i+2]) * (fi[3*i+2] - fd[3*i+2]);
-    Fn += fd[3*i+0] * fd[3*i+0] + fd[3*i+1] * fd[3*i+1] + fd[3*i+2] * fd[3*i+2];
+    diff1 += (pi[i] - pd[i]) * (pi[i] - pd[i]);
+    norm1 += pd[i] * pd[i];
+    diff2 += (fi[3*i+0] - fd[3*i+0]) * (fi[3*i+0] - fd[3*i+0])
+          + (fi[3*i+1] - fd[3*i+1]) * (fi[3*i+1] - fd[3*i+1])
+          + (fi[3*i+2] - fd[3*i+2]) * (fi[3*i+2] - fd[3*i+2]);
+    norm2 += fd[3*i+0] * fd[3*i+0] + fd[3*i+1] * fd[3*i+1] + fd[3*i+2] * fd[3*i+2];
   }
-  std::cout << std::fixed << std::setprecision(7);
-  std::cout << "Potential @ rank " << mpirank << "   : " << sqrtf(Pd/Pn) << std::endl;
-  std::cout << "Force     @ rank " << mpirank << "   : " << sqrtf(Fd/Fn) << std::endl;
+  MPI_Reduce(&diff1, &diff3, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&norm1, &norm3, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&diff2, &diff4, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&norm2, &norm4, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  if (mpirank == 0) {
+    std::cout << "--- FMM vs. direct ---------------" << std::endl;
+    std::cout << std::setw(stringLength) << std::left
+	      << "Rel. L2 Error (pot)" << " : " << std::sqrt(diff3/norm3) << std::endl;
+    if( std::abs(diff3) > 0 ) {
+      std::cout << std::setw(stringLength) << std::left
+	        << "Rel. L2 Error (acc)" << " : " << std::sqrt(diff4/norm4) << std::endl;
+    }
+  }    
 
   delete[] xi;
   delete[] qi;