Commits

Rio Yokota committed 55f1af2

Added IneJ to serial.cxx and parallel.cxx. printNow -> verbose & modified args.h.

  • Participants
  • Parent commits d5d490d

Comments (0)

Files changed (15)

 cd examples
 make wrapper
 
+To get the list of command line options:
+./a.out --help
 
 2. Where are the important files?
 Most of the source code is in the directory "include" in the form of header files.

File examples/Makefile

 include ../Makefile.include
 
-#LFLAGS	+= -DMANY
+#LFLAGS	+= -DIneJ # Different target and source points
 
 serial: serial.o $(OBJECTS)
 	$(CXX) $? $(LFLAGS)
-	./a.out --numBodies 10000
-#	numactl --interleave=all ./a.out
+	./a.out
 
 parallel: parallel.o $(OBJECTS)
 	$(CXX) $? $(LFLAGS)
 	mpirun -np 8 ./a.out
 
+scalability: serial.o $(OBJECTS)
+	$(CXX) $? $(LFLAGS)
+	for N in 10000 20000 50000 100000 200000 500000 1000000; do \
+        ./a.out --numBodies $$N --verbose 0; echo; \
+	done
+
 kernel: kernel.o $(OBJECTS)
 	$(CXX) $? $(LFLAGS)
 	./a.out

File examples/kernel.cxx

 #include <vector>
 #if VTK
 #include "vtk.h"
+#else
+#error Turn on VTK for this test
 #endif
 #if COMkernel
 #error Turn off COMkernel for this test
   file.open("kernel.dat", std::ios::out | std::ios::app);
   double diff = 0, norm = 0;
   for (B_iter B=bodies.begin(),B2=bodies2.begin(); B!=bodies.end(); ++B,++B2) {
-    std::cout << B->TRG[0] << " " << B2->TRG[0] << std::endl;
     diff += (B->TRG[0] - B2->TRG[0]) * (B->TRG[0] - B2->TRG[0]);
     norm += B2->TRG[0] * B2->TRG[0];
   }
     int p;
     double e;
     std::stringstream stream(line);
-    stream >> p >>  e;
+    stream >> p >> e;
+#if Cartesian
+    p++;
+#endif
     order.push_back(p);
     error.push_back(log10(e));
     bound.push_back(log10(std::pow(THETA,p)));

File examples/parallel.cxx

   UpDownPass pass(args.THETA);
   Traversal traversal(args.NSPAWN,args.IMAGES);
   LocalEssentialTree LET(args.IMAGES);
-  logger.printNow = LET.MPIRANK == 0;
-  boundbox.printNow = LET.MPIRANK == 0;
-  tree.printNow = LET.MPIRANK == 0;
-  pass.printNow = LET.MPIRANK == 0;
-  traversal.printNow = LET.MPIRANK == 0;
-  LET.printNow = LET.MPIRANK == 0;
+  logger.verbose = LET.MPIRANK == 0;
+  args.verbose &= logger.verbose;
+  if (args.verbose) {
+    boundbox.verbose = true;
+    tree.verbose = true;
+    pass.verbose = true;
+    traversal.verbose = true;
+    LET.verbose = true;
+    logger.printTitle("Parameters");
+  }
+  if(LET.MPIRANK == 0) args.print(logger.stringLength,P);
 #if AUTO
   traversal.timeKernels();
 #endif
 #pragma omp parallel
 #pragma omp master
 #endif
-#ifdef MANY
-  for (int it=0; it<25; it++) {
-    int numBodies = int(std::pow(10,(it+24)/8.0)) / LET.MPISIZE;
-#else
-  {
-    int numBodies = args.numBodies / LET.MPISIZE;
+  int numBodies = args.numBodies / LET.MPISIZE;
+  if (args.verbose) logger.printTitle("Profiling");
+  bodies.resize(numBodies);
+  data.initBodies(bodies, args.distribution, LET.MPIRANK, LET.MPISIZE);
+  logger.startTimer("Total FMM");
+  Bounds localBounds = boundbox.getBounds(bodies);
+#if IneJ
+  jbodies.resize(numBodies);
+  data.initBodies(jbodies, args.distribution, LET.MPIRANK+LET.MPISIZE, LET.MPISIZE);
+  localBounds = boundbox.getBounds(jbodies,localBounds);
 #endif
-    if (LET.printNow) std::cout << std::endl
-      << "Num bodies           : " << numBodies << std::endl;
-    if (LET.printNow) logger.printTitle("Profiling");
-    bodies.resize(numBodies);
-    data.initBodies(bodies, args.distribution, LET.MPIRANK, LET.MPISIZE);
-    logger.startTimer("Total FMM");
-    Bounds localBounds = boundbox.getBounds(bodies);
-    Bounds globalBounds = LET.allreduceBounds(localBounds);
-    localBounds = LET.partition(bodies,globalBounds);
-    bodies = sort.sortBodies(bodies);
-    bodies = LET.commBodies(bodies);
-    Box box = boundbox.bounds2box(localBounds);
-    tree.buildTree(bodies, cells, box);
-    pass.upwardPass(cells);
-    logger.startPAPI();
+  Bounds globalBounds = LET.allreduceBounds(localBounds);
+  localBounds = LET.partition(bodies,globalBounds);
+  bodies = sort.sortBodies(bodies);
+  bodies = LET.commBodies(bodies);
+#if IneJ
+  LET.partition(jbodies,globalBounds);
+  jbodies = sort.sortBodies(jbodies);
+  jbodies = LET.commBodies(jbodies);
+#endif
+  Box box = boundbox.bounds2box(localBounds);
+  logger.startPAPI();
+  tree.buildTree(bodies, cells, box);
+  pass.upwardPass(cells);
+#if IneJ
+  tree.buildTree(jbodies, jcells, box);
+  pass.upwardPass(jcells);
+#endif
 
 #if 1 // Set to 0 for debugging by shifting bodies and reconstructing tree : Step 1
-    LET.setLET(cells,localBounds,cycle);
-    LET.commBodies();
-    LET.commCells();
-    traversal.dualTreeTraversal(cells, cells, cycle, args.mutual);
-    jbodies = bodies;
-    for (int irank=1; irank<LET.MPISIZE; irank++) {
-      LET.getLET(jcells,(LET.MPIRANK+irank)%LET.MPISIZE);
+#if IneJ
+  LET.setLET(jcells,localBounds,cycle);
+#else
+  LET.setLET(cells,localBounds,cycle);
+#endif
+  LET.commBodies();
+  LET.commCells();
+#if IneJ
+  traversal.dualTreeTraversal(cells, jcells, cycle, args.mutual);
+#else
+  traversal.dualTreeTraversal(cells, cells, cycle, args.mutual);
+  jbodies = bodies;
+#endif
+  for (int irank=1; irank<LET.MPISIZE; irank++) {
+    LET.getLET(jcells,(LET.MPIRANK+irank)%LET.MPISIZE);
 
 #if 0 // Set to 1 for debugging full LET communication : Step 2 (LET must be set to full tree)
-      LET.shiftBodies(jbodies); // This will overwrite recvBodies. (define recvBodies2 in partition.h to avoid this)
-      Cells icells;
-      tree.buildTree(jbodies, icells);
-      pass.upwardPass(icells);
-      assert( icells.size() == jcells.size() );
-      CellQueue Qi, Qj;
-      Qi.push(icells.begin());
-      Qj.push(jcells.begin());
-      int ic=0;
-      while (!Qi.empty()) {
-        C_iter Ci=Qi.front(); Qi.pop();
-        C_iter Cj=Qj.front(); Qj.pop();
-        if (Ci->ICELL != Cj->ICELL) {
-          std::cout << LET.MPIRANK << " ICELL  : " << Ci->ICELL << " " << Cj->ICELL << std::endl;
-          break;
-        }
-        if (Ci->NCHILD != Cj->NCHILD) {
-          std::cout << LET.MPIRANK << " NCHILD : " << Ci->NCHILD << " " << Cj->NCHILD << std::endl;
-          break;
-        }
-        if (Ci->NCBODY != Cj->NCBODY) {
-          std::cout << LET.MPIRANK << " NCBODY : " << Ci->NCBODY << " " << Cj->NCBODY << std::endl;
-          break;
-        }
-        real_t sumi = 0, sumj = 0;
-        if (Ci->NCBODY != 0) {
-          for (int i=0; i<Ci->NCBODY; i++) {
-            B_iter Bi = Ci->BODY+i;
-            B_iter Bj = Cj->BODY+i;
-            sumi += Bi->X[0];
-            sumj += Bj->X[0];
-          }
-        }
-        if (fabs(sumi-sumj)/fabs(sumi) > 1e-6) std::cout << LET.MPIRANK << " " << Ci->ICELL << " " << sumi << " " << sumj << std::endl;
-        assert( fabs(sumi-sumj)/fabs(sumi) < 1e-6 );
-        for (int i=0; i<Ci->NCHILD; i++) Qi.push(icells.begin()+Ci->CHILD+i);
-        for (int i=0; i<Cj->NCHILD; i++) Qj.push(jcells.begin()+Cj->CHILD+i);
-        ic++;
+    LET.shiftBodies(jbodies); // This will overwrite recvBodies. (define recvBodies2 in partition.h to avoid this)
+    Cells icells;
+    tree.buildTree(jbodies, icells);
+    pass.upwardPass(icells);
+    assert( icells.size() == jcells.size() );
+    CellQueue Qi, Qj;
+    Qi.push(icells.begin());
+    Qj.push(jcells.begin());
+    int ic=0;
+    while (!Qi.empty()) {
+      C_iter Ci=Qi.front(); Qi.pop();
+      C_iter Cj=Qj.front(); Qj.pop();
+      if (Ci->ICELL != Cj->ICELL) {
+	std::cout << LET.MPIRANK << " ICELL  : " << Ci->ICELL << " " << Cj->ICELL << std::endl;
+	break;
       }
-      assert( ic == int(icells.size()) );
+      if (Ci->NCHILD != Cj->NCHILD) {
+	std::cout << LET.MPIRANK << " NCHILD : " << Ci->NCHILD << " " << Cj->NCHILD << std::endl;
+	break;
+      }
+      if (Ci->NCBODY != Cj->NCBODY) {
+	std::cout << LET.MPIRANK << " NCBODY : " << Ci->NCBODY << " " << Cj->NCBODY << std::endl;
+	break;
+      }
+      real_t sumi = 0, sumj = 0;
+      if (Ci->NCBODY != 0) {
+	for (int i=0; i<Ci->NCBODY; i++) {
+	  B_iter Bi = Ci->BODY+i;
+	  B_iter Bj = Cj->BODY+i;
+	  sumi += Bi->X[0];
+	  sumj += Bj->X[0];
+	}
+      }
+      if (fabs(sumi-sumj)/fabs(sumi) > 1e-6) std::cout << LET.MPIRANK << " " << Ci->ICELL << " " << sumi << " " << sumj << std::endl;
+      assert( fabs(sumi-sumj)/fabs(sumi) < 1e-6 );
+      for (int i=0; i<Ci->NCHILD; i++) Qi.push(icells.begin()+Ci->CHILD+i);
+      for (int i=0; i<Cj->NCHILD; i++) Qj.push(jcells.begin()+Cj->CHILD+i);
+      ic++;
+    }
+    assert( ic == int(icells.size()) );
 #endif
-      traversal.dualTreeTraversal(cells, jcells, cycle, args.mutual);
-    }
+    traversal.dualTreeTraversal(cells, jcells, cycle, false);
+  }
 #else
-    jbodies = bodies;
-    for (int irank=0; irank<LET.MPISIZE; irank++) {
-      LET.shiftBodies(jbodies);
-      jcells.clear();
-      localBounds = boundbox.getBounds(jbodies);
-      box = boundbox.bounds2box(localBounds);
-      tree.buildTree(jbodies, jcells, box);
-      pass.upwardPass(jcells);
-      traversal.dualTreeTraversal(cells, jcells, cycle, args.mutual);
-    }
+  jbodies = bodies;
+  for (int irank=0; irank<LET.MPISIZE; irank++) {
+    LET.shiftBodies(jbodies);
+    jcells.clear();
+    localBounds = boundbox.getBounds(jbodies);
+    box = boundbox.bounds2box(localBounds);
+    tree.buildTree(jbodies, jcells, box);
+    pass.upwardPass(jcells);
+    traversal.dualTreeTraversal(cells, jcells, cycle, args.mutual);
+  }
 #endif
-    pass.downwardPass(cells);
+  pass.downwardPass(cells);
 
 #if 0
-    LET.unpartition(bodies);
-    bodies = sort.sortBodies(bodies);
-    bodies = LET.commBodies(bodies);
-    for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
-      B->ICELL = B->IBODY;
-    }
-    bodies = sort.sortBodies(bodies);
+  LET.unpartition(bodies);
+  bodies = sort.sortBodies(bodies);
+  bodies = LET.commBodies(bodies);
+  for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
+    B->ICELL = B->IBODY;
+  }
+  bodies = sort.sortBodies(bodies);
 #endif
-    logger.stopPAPI();
-    logger.stopTimer("Total FMM");
-    if (logger.printNow) logger.printTitle("MPI direct sum");
-    boundbox.writeTime();
-    tree.writeTime();
-    pass.writeTime();
-    traversal.writeTime();
-    LET.writeTime();
-    boundbox.resetTimer();
-    tree.resetTimer();
-    pass.resetTimer();
-    traversal.resetTimer();
-    LET.resetTimer();
-    logger.resetTimer();
-    jbodies = bodies;
-    if (int(bodies.size()) > args.numTarget) data.sampleBodies(bodies, args.numTarget);
-    Bodies bodies2 = bodies;
-    data.initTarget(bodies2);
-    logger.startTimer("Total Direct");
-    for (int i=0; i<LET.MPISIZE; i++) {
-      LET.shiftBodies(jbodies);
-      traversal.direct(bodies2, jbodies, cycle);
-      if (logger.printNow) std::cout << "Direct loop          : " << i+1 << "/" << LET.MPISIZE << std::endl;
-    }
-    traversal.normalize(bodies2);
-    if (logger.printNow) logger.printTitle("Total runtime");
-    if (logger.printNow) logger.printTime("Total FMM");
-    logger.stopTimer("Total Direct",logger.printNow);
-    double diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0, diff3 = 0, norm3 = 0, diff4 = 0, norm4 = 0;
-    data.evalError(bodies, bodies2, diff1, norm1, diff2, norm2);
-    MPI_Reduce(&diff1, &diff3, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-    MPI_Reduce(&norm1, &norm3, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-    MPI_Reduce(&diff2, &diff4, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-    MPI_Reduce(&norm2, &norm4, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-    if(logger.printNow) {
-      logger.printError(diff3, norm3, diff4, norm4);
-      tree.printTreeData(cells);
-      traversal.printTraversalData();
-      logger.printPAPI();
-    }
+  logger.stopPAPI();
+  logger.stopTimer("Total FMM");
+  if (args.verbose) logger.printTitle("MPI direct sum");
+  if (int(bodies.size()) > args.numTarget) data.sampleBodies(bodies, args.numTarget);
+  Bodies bodies2 = bodies;
+  data.initTarget(bodies2);
+  logger.startTimer("Total Direct");
+  for (int i=0; i<LET.MPISIZE; i++) {
+    LET.shiftBodies(jbodies);
+    traversal.direct(bodies2, jbodies, cycle);
+    if (args.verbose) std::cout << "Direct loop          : " << i+1 << "/" << LET.MPISIZE << std::endl;
+  }
+  traversal.normalize(bodies2);
+  if (args.verbose) logger.printTitle("Total runtime");
+  if (logger.verbose) logger.printTime("Total FMM");
+  logger.stopTimer("Total Direct",logger.verbose);
+  boundbox.writeTime();
+  tree.writeTime();
+  pass.writeTime();
+  traversal.writeTime();
+  LET.writeTime();
+  boundbox.resetTimer();
+  tree.resetTimer();
+  pass.resetTimer();
+  traversal.resetTimer();
+  LET.resetTimer();
+  logger.resetTimer();
+  double diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0, diff3 = 0, norm3 = 0, diff4 = 0, norm4 = 0;
+  data.evalError(bodies, bodies2, diff1, norm1, diff2, norm2);
+  MPI_Reduce(&diff1, &diff3, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&norm1, &norm3, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&diff2, &diff4, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  MPI_Reduce(&norm2, &norm4, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+  if(args.verbose) {
+    logger.printError(diff3, norm3, diff4, norm4);
+    tree.printTreeData(cells);
+    traversal.printTraversalData();
+    logger.printPAPI();
   }
 
 #if VTK

File examples/serial.cxx

   Cells cells, jcells;
   Dataset data;
   Logger logger;
-  logger.printTitle("Parameters");
-  args.print(logger.stringLength,P);
 
   const real_t cycle = 2 * M_PI;
   BoundBox boundbox(args.NSPAWN);
   BuildTree tree(args.NCRIT,args.NSPAWN);
   UpDownPass pass(args.THETA);
   Traversal traversal(args.NSPAWN,args.IMAGES);
-  logger.printNow = true;
-  boundbox.printNow = true;
-  tree.printNow = true;
-  pass.printNow = true;
-  traversal.printNow = true;
-  traversal.RHO = pass.RHO = 5;
+  logger.verbose = true;
+  if (args.verbose) {
+    boundbox.verbose = true;
+    tree.verbose = true;
+    pass.verbose = true;
+    traversal.verbose = true;
+    logger.printTitle("Parameters");
+  }
+  args.print(logger.stringLength,P);
 #if AUTO
   traversal.timeKernels();
 #endif
 #pragma omp parallel
 #pragma omp master
 #endif
-#ifdef MANY
-  for (int it=0; it<25; it++) {
-    int numBodies = int(std::pow(10,(it+24)/8.0));
+  if(args.verbose) logger.printTitle("Profiling");
+  bodies.resize(args.numBodies);
+  data.initBodies(bodies, args.distribution);
+  Bounds bounds = boundbox.getBounds(bodies);
+#if IneJ
+  jbodies.resize(args.numBodies);
+  data.initBodies(jbodies, args.distribution);
+  bounds = boundbox.getBounds(jbodies,bounds);
+#endif
+  Box box = boundbox.bounds2box(bounds);
+  tree.buildTree(bodies, cells, box);                         // TODO : make it work without this
+#if IneJ
+  tree.buildTree(jbodies, jcells, box);                       // TODO : make it work without this
+#endif
+  tree.resetTimer();
+  logger.startTimer("Total FMM");
+  logger.startPAPI();
+  tree.buildTree(bodies, cells, box);
+  pass.upwardPass(cells);
+#if IneJ
+  tree.buildTree(jbodies, jcells, box);
+  pass.upwardPass(jcells);
+  traversal.dualTreeTraversal(cells, jcells, cycle, args.mutual);
 #else
-  {
-    int numBodies = args.numBodies;
-#endif // MANY
-    logger.printTitle("Profiling");
-    bodies.resize(numBodies);
-    data.initBodies(bodies, args.distribution);
-    Bounds bounds = boundbox.getBounds(bodies);
-    Box box = boundbox.bounds2box(bounds);
-    tree.buildTree(bodies, cells, box);                         //TODO : make it work without this
-    tree.resetTimer();
-    logger.startTimer("Total FMM");
-    tree.buildTree(bodies, cells, box);
-    pass.upwardPass(cells);
-    logger.startPAPI();
-    traversal.dualTreeTraversal(cells, cells, cycle, args.mutual);
-    logger.stopPAPI();
-    pass.downwardPass(cells);
-    logger.printTitle("Total runtime");
-    logger.stopTimer("Total FMM", logger.printNow);
-    boundbox.writeTime();
-    tree.writeTime();
-    pass.writeTime();
-    traversal.writeTime();
-    boundbox.resetTimer();
-    tree.resetTimer();
-    pass.resetTimer();
-    traversal.resetTimer();
-    logger.resetTimer();
-    jbodies = bodies;
-    if (int(bodies.size()) > args.numTarget) data.sampleBodies(bodies, args.numTarget);
-    Bodies bodies2 = bodies;
-    data.initTarget(bodies2);
-    logger.startTimer("Total Direct");
-    traversal.direct(bodies2, jbodies, cycle);
-    traversal.normalize(bodies2);
-    logger.stopTimer("Total Direct", logger.printNow);
-    double diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0;
-    data.evalError(bodies, bodies2, diff1, norm1, diff2, norm2);
-    if (logger.printNow) {
-      logger.printError(diff1, norm1, diff2, norm2);
-      tree.printTreeData(cells);
-      traversal.printTraversalData();
-      logger.printPAPI();
-    }
+  traversal.dualTreeTraversal(cells, cells, cycle, args.mutual);
+  jbodies = bodies;
+#endif
+  pass.downwardPass(cells);
+  if (args.verbose) logger.printTitle("Total runtime");
+  logger.stopPAPI();
+  logger.stopTimer("Total FMM", logger.verbose);
+  boundbox.writeTime();
+  tree.writeTime();
+  pass.writeTime();
+  traversal.writeTime();
+  boundbox.resetTimer();
+  tree.resetTimer();
+  pass.resetTimer();
+  traversal.resetTimer();
+  logger.resetTimer();
+  if (int(bodies.size()) > args.numTarget) data.sampleBodies(bodies, args.numTarget);
+  Bodies bodies2 = bodies;
+  data.initTarget(bodies2);
+  logger.startTimer("Total Direct");
+  traversal.direct(bodies2, jbodies, cycle);
+  traversal.normalize(bodies2);
+  logger.stopTimer("Total Direct", args.verbose);
+  double diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0;
+  data.evalError(bodies, bodies2, diff1, norm1, diff2, norm2);
+  if (args.verbose) {
+    logger.printError(diff1, norm1, diff2, norm2);
+    tree.printTreeData(cells);
+    traversal.printTraversalData();
+    logger.printPAPI();
   }
 #if VTK
   for (B_iter B=jbodies.begin(); B!=jbodies.end(); B++) B->ICELL = 0;
-  for (C_iter C=cells.begin(); C!=cells.end(); C++) {
+  for (C_iter C=jcells.begin(); C!=jcells.end(); C++) {
     Body body;
     body.ICELL = 1;
     body.X     = C->X;

File include/args.h

   {"images",       1, 0, 'i'},
   {"theta",        1, 0, 'o'},
   {"mutual",       1, 0, 'm'},
+  {"verbose",      1, 0, 'v'},
   {"distribution", 1, 0, 'd'},
   {"help",         0, 0, 'h'},
   {0, 0, 0, 0}
   int IMAGES;
   double THETA;
   int mutual;
+  int verbose;
   const char * distribution;
 
  private:
             " --nspawn : Threshold for splitting both cells during recursion (%d)\n"
             " --images : Number of periodic image levels (%d)\n"
             " --theta : Multipole acceptance criterion (%f)\n"
-            " --mutual [0/1] :  use mutual interaction (%d)\n"
+            " --mutual [0/1] : Use mutual interaction (%d)\n"
+	    " --verbose [0/1] : Print information to screen (%d)\n"
             " --distribution [l/c/s/p] : lattice, cube, sphere, plummer (%s)\n"
             " --help : Show this help document\n",
             name,
             IMAGES,
             THETA,
             mutual,
+	    verbose,
             distribution);
   }
 
 
  public:
   Args(int argc, char ** argv) : numBodies(1000000), numTarget(100), NCRIT(16), NSPAWN(1000), IMAGES(0),
-                                 THETA(.6), mutual(1), distribution("cube") {
+    THETA(.6), mutual(1), verbose(1), distribution("cube") {
     while (1) {
       int option_index;
       int c = getopt_long(argc, argv, "", long_options, &option_index);
       case 'm':
         mutual = atoi(optarg);
         break;
+      case 'v':
+	verbose= atoi(optarg);
+	break;
       case 'd':
         distribution = parse(optarg);
         break;
   }
 
   void print(int stringLength, int P) {
-    std::cout << std::setw(stringLength) << std::left           // Set format
-	      << "numBodies" << " : " << numBodies << std::endl // Print numBodies  
-              << std::setw(stringLength)                        // Set format
-              << "P" << " : " << P << std::endl                 // Print P
-              << std::setw(stringLength)                        // Set format
-              << "THETA" << " : " << THETA << std::endl         // Print THETA
-              << std::setw(stringLength)                        // Set format
-              << "NCRIT" << " : " << NCRIT << std::endl         // Print NCRIT
-              << std::setw(stringLength)                        // Set format
-              << "NSPAWN" << " : " << NSPAWN << std::endl       // Print NSPAWN
-              << std::setw(stringLength)                        // Set format
-              << "IMAGES" << " : " << IMAGES << std::endl       // Print IMAGES
-              << std::setw(stringLength)                        // Set format
-              << "mutual" << " : " << mutual << std::endl       // Print mutual
-              << std::setw(stringLength)                        // Set format
-              << "distribution" << " : " << distribution << std::endl;// Print distribution
+    if (verbose) {
+      std::cout << std::setw(stringLength) << std::left         // Set format
+		<< "numBodies" << " : " << numBodies << std::endl // Print numBodies  
+		<< std::setw(stringLength)                      // Set format
+		<< "P" << " : " << P << std::endl               // Print P
+		<< std::setw(stringLength)                      // Set format
+		<< "THETA" << " : " << THETA << std::endl       // Print THETA
+		<< std::setw(stringLength)                      // Set format
+		<< "NCRIT" << " : " << NCRIT << std::endl       // Print NCRIT
+		<< std::setw(stringLength)                      // Set format
+		<< "NSPAWN" << " : " << NSPAWN << std::endl     // Print NSPAWN
+		<< std::setw(stringLength)                      // Set format
+		<< "IMAGES" << " : " << IMAGES << std::endl     // Print IMAGES
+		<< std::setw(stringLength)                      // Set format
+		<< "mutual" << " : " << mutual << std::endl     // Print mutual
+		<< std::setw(stringLength)                      // Set format
+		<< "verbose" << " : " << verbose << std::endl   // Print verbose
+		<< std::setw(stringLength)                      // Set format
+		<< "distribution" << " : " << distribution << std::endl;// Print distribution
+    } else {
+      std::cout << std::setw(stringLength) << std::left         // Set format
+		<< "numBodies" << " : " << numBodies << std::endl; // Print numBodies  
+    }
   }
 };
 #endif

File include/boundbox.h

   int NSPAWN;                                                   //!< Threshold of NDBODY for spawning new threads
 
 //! Recursively get Xmin and Xmax of domain
-  Bounds boundsRecursion(B_iter BiBegin, B_iter BiEnd) {
+  Bounds boundsRecursion(B_iter BiBegin, B_iter BiEnd, Bounds bounds) {
     assert(BiEnd - BiBegin > 0);
     if (BiEnd - BiBegin < NSPAWN) {                             // If number of elements is small enough
-      Bounds bounds;
-      bounds.Xmin = BiBegin->X;                                 //  Initialize Xmin with first value
-      bounds.Xmax = BiBegin->X;                                 //  Initialize Xmax with first value
       for (B_iter B=BiBegin; B!=BiEnd; B++) {                   //  Loop over range of bodies
         bounds.Xmin = min(B->X, bounds.Xmin);                   //   Update Xmin
         bounds.Xmax = max(B->X, bounds.Xmax);                   //   Update Xmax
       return bounds;                                            //  Return Xmin and Xmax as pair
     } else {                                                    // Else if number of elements are large
       B_iter BiMid = BiBegin + (BiEnd - BiBegin) / 2;           //  Middle iterator
-      Bounds bounds0, bounds1;                                  //  Pair : first Xmin : second Xmax
       spawn_tasks {                                             //  Initialize tasks
-	spawn_task1(bounds0, bounds0 = boundsRecursion(BiBegin, BiMid));//  Recursive call with new task
-	bounds1 = boundsRecursion(BiMid, BiEnd);                //  Recursive call with old task
+	spawn_task1(bounds, bounds = boundsRecursion(BiBegin, BiMid, bounds));// Recursive call with new task
+	Bounds bounds2 = boundsRecursion(BiMid, BiEnd, bounds); //  Recursive call with old task
 	sync_tasks;                                             //  Synchronize tasks
-	bounds0.Xmin = min(bounds0.Xmin, bounds1.Xmin);         //  Minimum of the two Xmins
-	bounds0.Xmax = max(bounds0.Xmax, bounds1.Xmax);         //  Maximum of the two Xmaxs
+	bounds.Xmin = min(bounds.Xmin, bounds2.Xmin);           //  Minimum of the two Xmins
+	bounds.Xmax = max(bounds.Xmax, bounds2.Xmax);           //  Maximum of the two Xmaxs
       }
-      return bounds0;                                           //  Return Xmin and Xmax
+      return bounds;                                            //  Return Xmin and Xmax
     }                                                           // End if for number fo elements
   }
 
   // ! Get Xmin and Xmax of domain
   Bounds getBounds(Bodies bodies) {
     startTimer("Get bounds");                                   // Start timer
-    Bounds bounds = boundsRecursion(bodies.begin(),bodies.end());// Recursive call for bounds calculation
-    stopTimer("Get bounds",printNow);                           // Stop timer
+    Bounds bounds;                                              // Bounds : Contains Xmin, Xmax
+    bounds.Xmin = bounds.Xmax = bodies.front().X;               // Initialize Xmin, Xmax
+    bounds = boundsRecursion(bodies.begin(),bodies.end(),bounds);// Recursive call for bounds calculation
+    stopTimer("Get bounds",verbose);                            // Stop timer
+    return bounds;                                              // Return Xmin and Xmax
+  }
+
+  // ! Update Xmin and Xmax of domain
+  Bounds getBounds(Bodies bodies, Bounds bounds) {
+    startTimer("Get bounds");                                   // Start timer
+    bounds = boundsRecursion(bodies.begin(),bodies.end(),bounds);// Recursive call for bounds calculation
+    stopTimer("Get bounds",verbose);                            // Stop timer
     return bounds;                                              // Return Xmin and Xmax
   }
 

File include/buildtree.h

     binNode->END = binNode->BEGIN + maxBinNode;                 // Set end pointer
     N0 = buildNodes(bodies, buffer, 0, bodies.size(), binNode, X0, R0);// Build tree recursively
     delete[] binNode->BEGIN;                                    // Deallocate binary tree array
-    stopTimer("Grow tree",printNow);                            // Stop timer
+    stopTimer("Grow tree",verbose);                             // Stop timer
   }
 
 //! Link tree structure
     C_iter C0 = cells.begin();                                  // Cell begin iterator
     nodes2cells(N0, C0, C0, C0+1, R0);                          // Convert nodes to cells recursively
     delete N0;                                                  // Deallocate nodes
-    stopTimer("Link tree",printNow);                            // Stop timer
+    stopTimer("Link tree",verbose);                             // Stop timer
   }
 
  public:

File include/localessentialtree.h

       }                                                         //  Endif for current rank
       sendCellCount[IRANK] = sendCells.size() - sendCellDispl[IRANK];// Send count for IRANK
     }                                                           // End loop over ranks
-    stopTimer("Set LET",printNow);                              // Stop timer
+    stopTimer("Set LET",verbose);                               // Stop timer
   }
 
 //! Get local essential tree from rank "irank".
     }                                                           // End loop over receive cells
     cells.resize(recvCellCount[irank]);                         // Resize cell vector for LET
     cells.assign(recvCells.begin()+recvCellDispl[irank],recvCells.begin()+recvCellDispl[irank]+recvCellCount[irank]);
-    stopTimer("Get LET",printNow);                              // Stop timer
+    stopTimer("Get LET",verbose);                               // Stop timer
   }
 
 //! Send bodies
     startTimer("Comm bodies");                                  // Start timer
     alltoall(sendBodies);                                       // Send body count
     alltoallv(sendBodies);                                      // Send bodies
-    stopTimer("Comm bodies",printNow);                          // Stop timer
+    stopTimer("Comm bodies",verbose);                           // Stop timer
     return recvBodies;                                          // Return received bodies
   }
 
     startTimer("Comm bodies");                                  // Start timer
     alltoall(bodies);                                           // Send body count
     alltoallv(bodies);                                          // Send bodies
-    stopTimer("Comm bodies",printNow);                          // Stop timer
+    stopTimer("Comm bodies",verbose);                           // Stop timer
     return recvBodies;                                          // Return received bodies
   }
 
     startTimer("Comm cells");                                   // Start timer
     alltoall(sendCells);                                        // Send cell count
     alltoallv(sendCells);                                       // Senc cells
-    stopTimer("Comm cells",printNow);                           // Stop timer
+    stopTimer("Comm cells",verbose);                            // Stop timer
   }
 };
 #endif

File include/logger.h

  public:
   int stringLength;                                             //!< Max length of event name
   int decimal;                                                  //!< Decimal precision
-  bool printNow;                                                //!< Switch to print timings
+  bool verbose;                                                 //!< Print to screen
 
  private:
 //! Timer function
 #endif
     stringLength(20),                                           // Max length of event name
     decimal(7),                                                 // Decimal precision
-    printNow(false) {                                           // Don't print timings by default
+    verbose(false) {                                            // Don't print timings by default
     pthread_mutex_init(&mutex,NULL);                            // Initialize pthread communicator
   }
 //! Destructor

File include/partition.h

       assert(0 <= B->IPROC && B->IPROC < MPISIZE);
       B->ICELL = B->IPROC;                                      //  Do this to sort accroding to IPROC
     }                                                           // End loop over bodies
-    stopTimer("Partition",printNow);                            // Stop timer
+    stopTimer("Partition",verbose);                             // Stop timer
     return local;
   }
 
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       B->ICELL = B->IPROC;                                      //  Do this to sortaccroding to IPROC
     }                                                           // End loop over bodies
-    stopTimer("Unpartition", printNow);                         // Stop timer
+    stopTimer("Unpartition",verbose);                           // Stop timer
   }
 };
 #endif

File include/traversal.h

       cycle *= 3;                                               //  Increase center cell size three times
       Cj0 = C0;                                                 //  Reset Cj0 back
     }                                                           // End loop over sublevels of tree
-    stopTimer("Traverse periodic",printNow);                    // Stop timer
+    stopTimer("Traverse periodic",verbose);                     // Stop timer
   }
 
  protected:
       }                                                         //  End loop over x periodic direction
       traversePeriodic(cycle);                                  //  Traverse tree for periodic images
     }                                                           // End if for periodic boundary condition
-    stopTimer("Traverse",printNow);                             // Stop timer
+    stopTimer("Traverse",verbose);                              // Stop timer
     writeTrace();
   }
 

File include/updownpass.h

         C->RCRIT *= 10;                                         //   Prevent approximation
       }                                                         //  End loop over top 2 levels of cells
     }                                                           // End if for tree levels
-    stopTimer("Upward pass",printNow);                          // Stop timer
+    stopTimer("Upward pass",verbose);                           // Stop timer
   }
 
 //! Downward pass (L2L, L2P)
       }                                                         // End loop over child cells
       sync_tasks;                                               // Synchronize tasks
     }
-    stopTimer("Downward pass",printNow);                        // Stop timer
+    stopTimer("Downward pass",verbose);                         // Stop timer
   }
 };
 #endif

File wrappers/laplace.cxx

   UpDownPass pass(THETA);
   Traversal traversal(NSPAWN,IMAGES);
   LocalEssentialTree LET(IMAGES);
-  boundbox.printNow = LET.MPIRANK == 0;
-  tree.printNow = LET.MPIRANK == 0;
-  pass.printNow = LET.MPIRANK == 0;
-  traversal.printNow = LET.MPIRANK == 0;
-  LET.printNow = LET.MPIRANK == 0;
+  boundbox.verbose = LET.MPIRANK == 0;
+  tree.verbose = LET.MPIRANK == 0;
+  pass.verbose = LET.MPIRANK == 0;
+  traversal.verbose = LET.MPIRANK == 0;
+  LET.verbose = LET.MPIRANK == 0;
 
   bodies.resize(n);
   for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {

File wrappers/matern.cxx

 #include "updownpass.h"
 #include "localessentialtree.h"
 
-extern "C" void FMM(int n, double* x, double* q, double *p, double nu, double rho) {
-  Bodies bodies, jbodies;
+extern "C" void FMM(int ni, double* xi, double *pi, int nj, double *xj, double * qj, double nu, double rho) {
+  Bodies bodies(ni), jbodies(nj);
   Cells cells, jcells;
   Sort sort;
 
   UpDownPass pass(THETA);
   Traversal traversal(NSPAWN,IMAGES);
   LocalEssentialTree LET(IMAGES);
-  boundbox.printNow = LET.MPIRANK == 0;
-  tree.printNow = LET.MPIRANK == 0;
-  pass.printNow = LET.MPIRANK == 0;
-  traversal.printNow = LET.MPIRANK == 0;
-  LET.printNow = LET.MPIRANK == 0;
+  boundbox.verbose = LET.MPIRANK == 0;
+  tree.verbose = LET.MPIRANK == 0;
+  pass.verbose = LET.MPIRANK == 0;
+  traversal.verbose = LET.MPIRANK == 0;
+  LET.verbose = LET.MPIRANK == 0;
   traversal.NU = pass.NU = nu;
   traversal.RHO = pass.RHO = rho;
 
-  bodies.resize(n);
   for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {
     int i = B-bodies.begin();
-    B->X[0] = x[3*i+0];
-    B->X[1] = x[3*i+1];
-    B->X[2] = x[3*i+2];
-    B->SRC  = q[i];
-    B->TRG[0] =  p[i];
+    B->X[0] = xi[3*i+0];
+    B->X[1] = xi[3*i+1];
+    B->X[2] = xi[3*i+2];
+    B->TRG[0] =  pi[i];
+    B->IBODY  = i;
+  }
+
+  for (B_iter B=jbodies.begin(); B!=jbodies.end(); B++) {
+    int i = B-jbodies.begin();
+    B->X[0] = xj[3*i+0];
+    B->X[1] = xj[3*i+1];
+    B->X[2] = xj[3*i+2];
+    B->SRC =  qj[i];
     B->IBODY  = i;
   }