Commits

Rio Yokota committed e6ec3f5

parallel.cxx working with new code but not for all cases.

Comments (0)

Files changed (11)

 #LFLAGS	+= -std=c++0x -DTBB -ltbb
 
 ### MassiveThreads flags (doesn't work with OpenMP) : MassiveThreads is available from http://code.google.com/p/massivethreads/
-LFLAGS	+= -std=c++0x -DMTHREAD -lmyth
+#LFLAGS	+= -std=c++0x -DMTHREAD -lmyth
 
 ### PAPI flags
-LFLAGS	+= -DPAPI -lpapi
+#LFLAGS	+= -DPAPI -lpapi
 
 ### VTK flags : VTK is available from http://www.vtk.org/VTK/resources/software.html
 #CXX	+= -I$(VTK_INCLUDE_PATH)

examples/Makefile

 
 parallel: parallel.cxx $(KERNELS)
 	$(CXX) $? $(LFLAGS) $(VFLAGS)
-	mpirun -np 32 ./a.out
+	mpirun -np 4 ./a.out
 
 wrapper: wrapper.cxx $(KERNELS)
 	make -C ../wrapper libcoulomb.a

examples/parallel.cxx

+#include "args.h"
 #include "dataset.h"
 #include "parallelfmm.h"
 #ifdef VTK
 #include "vtk.h"
 #endif
 
-int main() {
-  int numBodies = 1000;
-  IMAGES = 0;
-  THETA = 0.6;
+int main(int argc, char ** argv) {
+  Args ARGS(argc, argv);
   Bodies bodies, jbodies;
   Cells cells, jcells;
   Dataset DATA;
   ParallelFMM FMM;
+  FMM.NCRIT = ARGS.NCRIT;
+  FMM.NSPAWN = ARGS.NSPAWN;
+  FMM.IMAGES = ARGS.IMAGES;
+  FMM.THETA = ARGS.THETA;
   FMM.printNow = FMM.MPIRANK == 0;
 #if AUTO
   FMM.timeKernels();
 #endif
 #ifdef MANY
   for ( int it=0; it<25; it++ ) {
-  numBodies = int(pow(10,(it+24)/8.0));
+    int numBodies = int(pow(10,(it+24)/8.0));
 #else
   {
-  numBodies = 1000000 / FMM.MPISIZE;
+    int numBodies = ARGS.numBodies / FMM.MPISIZE;
 #endif
-  if(FMM.printNow) std::cout << "N                    : " << numBodies << std::endl;
-  bodies.resize(numBodies);
-  DATA.initBodies(bodies,FMM.MPIRANK,FMM.MPISIZE);
-  FMM.startTimer("FMM");
+    if(FMM.printNow) std::cout << std::endl
+      << "N                    : " << numBodies << std::endl;
+    bodies.resize(numBodies);
+    DATA.initBodies(bodies, ARGS.distribution, FMM.MPIRANK, FMM.MPISIZE);
+    FMM.startTimer("FMM");
 
-  FMM.partition(bodies);
-  FMM.setBounds(bodies);
-  FMM.buildTree(bodies,cells);
-  FMM.upwardPass(cells);
-  FMM.startPAPI();
+    FMM.partition(bodies);
+    FMM.setBounds(bodies);
+    FMM.buildTree(bodies,cells);
+    FMM.upwardPass(cells);
+    FMM.startPAPI();
 
 #if 1 // Set to 0 for debugging by shifting bodies and reconstructing tree : Step 1
-  FMM.setLET(cells);
-  FMM.commBodies();
-  FMM.commCells();
-  FMM.evaluate(cells,cells);
-  jbodies = bodies;
-  for( int irank=1; irank<FMM.MPISIZE; irank++ ) {
-    FMM.getLET(jcells,(FMM.MPIRANK+irank)%FMM.MPISIZE);
+    FMM.setLET(cells);
+    FMM.commBodies();
+    FMM.commCells();
+    FMM.evaluate(cells,cells);
+    jbodies = bodies;
+    for( int irank=1; irank<FMM.MPISIZE; irank++ ) {
+      FMM.getLET(jcells,(FMM.MPIRANK+irank)%FMM.MPISIZE);
 
 #if 0 // Set to 1 for debugging full LET communication : Step 2 (LET must be set to full tree)
-    FMM.shiftBodies(jbodies); // This will overwrite recvBodies. (define recvBodies2 in partition.h to avoid this)
-    Cells icells;
-    FMM.buildTree(jbodies,icells);
-    FMM.upwardPass(icells);
-    assert( icells.size() == jcells.size() );
-    CellQueue Qi, Qj;
-    Qi.push(icells.begin());
-    Qj.push(jcells.begin());
-    int ic=0;
-    while( !Qi.empty() ) {
-      C_iter Ci=Qi.front(); Qi.pop();
-      C_iter Cj=Qj.front(); Qj.pop();
-      if( Ci->ICELL != Cj->ICELL ) {
-        std::cout << FMM.MPIRANK << " ICELL  : " << Ci->ICELL << " " << Cj->ICELL << std::endl;
-        break;
+      FMM.shiftBodies(jbodies); // This will overwrite recvBodies. (define recvBodies2 in partition.h to avoid this)
+      Cells icells;
+      FMM.buildTree(jbodies,icells);
+      FMM.upwardPass(icells);
+      assert( icells.size() == jcells.size() );
+      CellQueue Qi, Qj;
+      Qi.push(icells.begin());
+      Qj.push(jcells.begin());
+      int ic=0;
+      while( !Qi.empty() ) {
+        C_iter Ci=Qi.front(); Qi.pop();
+        C_iter Cj=Qj.front(); Qj.pop();
+        if( Ci->ICELL != Cj->ICELL ) {
+          std::cout << FMM.MPIRANK << " ICELL  : " << Ci->ICELL << " " << Cj->ICELL << std::endl;
+          break;
+        }
+        if( Ci->NCHILD != Cj->NCHILD ) {
+          std::cout << FMM.MPIRANK << " NCHILD : " << Ci->NCHILD << " " << Cj->NCHILD << std::endl;
+          break;
+        }
+        if( Ci->NCLEAF != Cj->NCLEAF ) {
+          std::cout << FMM.MPIRANK << " NCLEAF : " << Ci->NCLEAF << " " << Cj->NCLEAF << std::endl;
+          break;
+        }
+        real_t sumi = 0, sumj = 0;
+        if( Ci->NCLEAF != 0 ) {
+          for( int i=0; i<Ci->NCLEAF; i++ ) {
+            B_iter Bi = Ci->LEAF+i;
+            B_iter Bj = Cj->LEAF+i;
+            sumi += Bi->X[0];
+            sumj += Bj->X[0];
+          }
+        }
+        if( fabs(sumi-sumj)/fabs(sumi) > 1e-6 ) std::cout << FMM.MPIRANK << " " << Ci->ICELL << " " << sumi << " " << sumj << std::endl;
+        assert( fabs(sumi-sumj)/fabs(sumi) < 1e-6 );
+        for( int i=0; i<Ci->NCHILD; i++ ) Qi.push(icells.begin()+Ci->CHILD+i);
+        for( int i=0; i<Cj->NCHILD; i++ ) Qj.push(jcells.begin()+Cj->CHILD+i);
+        ic++;
       }
-      if( Ci->NCHILD != Cj->NCHILD ) {
-        std::cout << FMM.MPIRANK << " NCHILD : " << Ci->NCHILD << " " << Cj->NCHILD << std::endl;
-        break;
-      }
-      if( Ci->NCLEAF != Cj->NCLEAF ) {
-        std::cout << FMM.MPIRANK << " NCLEAF : " << Ci->NCLEAF << " " << Cj->NCLEAF << std::endl;
-        break;
-      }
-      real_t sumi = 0, sumj = 0;
-      if( Ci->NCLEAF != 0 ) {
-        for( int i=0; i<Ci->NCLEAF; i++ ) {
-          B_iter Bi = Ci->LEAF+i;
-          B_iter Bj = Cj->LEAF+i;
-          sumi += Bi->X[0];
-          sumj += Bj->X[0];
-        }
-      }
-      if( fabs(sumi-sumj)/fabs(sumi) > 1e-6 ) std::cout << FMM.MPIRANK << " " << Ci->ICELL << " " << sumi << " " << sumj << std::endl;
-      assert( fabs(sumi-sumj)/fabs(sumi) < 1e-6 );
-      for( int i=0; i<Ci->NCHILD; i++ ) Qi.push(icells.begin()+Ci->CHILD+i);
-      for( int i=0; i<Cj->NCHILD; i++ ) Qj.push(jcells.begin()+Cj->CHILD+i);
-      ic++;
+      assert( ic == int(icells.size()) );
+#endif
+      FMM.evaluate(cells,jcells);
     }
-    assert( ic == int(icells.size()) );
-#endif
-    FMM.evaluate(cells,jcells);
-  }
 #else
-  jbodies = bodies;
-  for( int irank=0; irank!=FMM.MPISIZE; irank++ ) {
-    FMM.shiftBodies(jbodies);
-    jcells.clear();
-    FMM.setBounds(jbodies);
-    FMM.buildTree(jbodies,jcells);
-    FMM.upwardPass(jcells);
-    FMM.evaluate(cells,jcells);
-  }
+    jbodies = bodies;
+    for( int irank=0; irank!=FMM.MPISIZE; irank++ ) {
+      FMM.shiftBodies(jbodies);
+      jcells.clear();
+      FMM.setBounds(jbodies);
+      FMM.buildTree(jbodies,jcells);
+      FMM.upwardPass(jcells);
+      FMM.evaluate(cells,jcells);
+    }
 #endif
 
-  FMM.downwardPass(cells);
+    FMM.downwardPass(cells);
 
-  FMM.stopPAPI();
-  FMM.stopTimer("FMM",FMM.printNow);
-  FMM.eraseTimer("FMM");
-  FMM.writeTime();
-  FMM.resetTimer();
+    FMM.stopPAPI();
+    FMM.stopTimer("FMM",FMM.printNow);
+    FMM.eraseTimer("FMM");
+    FMM.writeTime();
+    FMM.resetTimer();
 
-  jbodies = bodies;
-  if (bodies.size() > 100) bodies.resize(100);
-  Bodies bodies2 = bodies;
-  DATA.initTarget(bodies2);
-  FMM.startTimer("Direct sum");
-  for( int i=0; i!=FMM.MPISIZE; ++i ) {
-    FMM.shiftBodies(jbodies);
-    FMM.direct(bodies2,jbodies);
-    if(FMM.printNow) std::cout << "Direct loop          : " << i+1 << "/" << FMM.MPISIZE << std::endl;
-  }
-  FMM.normalize(bodies2);
-  FMM.stopTimer("Direct sum",FMM.printNow);
-  FMM.eraseTimer("Direct sum");
-  real_t diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0, diff3 = 0, norm3 = 0, diff4 = 0, norm4 = 0;
-  DATA.evalError(bodies,bodies2,diff1,norm1,diff2,norm2);
-  MPI_Datatype MPI_TYPE = FMM.getType(diff1);
-  MPI_Reduce(&diff1,&diff3,1,MPI_TYPE,MPI_SUM,0,MPI_COMM_WORLD);
-  MPI_Reduce(&norm1,&norm3,1,MPI_TYPE,MPI_SUM,0,MPI_COMM_WORLD);
-  MPI_Reduce(&diff2,&diff4,1,MPI_TYPE,MPI_SUM,0,MPI_COMM_WORLD);
-  MPI_Reduce(&norm2,&norm4,1,MPI_TYPE,MPI_SUM,0,MPI_COMM_WORLD);
-  if(FMM.printNow) DATA.printError(diff3,norm3,diff4,norm4);
+    jbodies = bodies;
+    if (bodies.size() > 100) bodies.resize(100);
+    Bodies bodies2 = bodies;
+    DATA.initTarget(bodies2);
+    FMM.startTimer("Direct sum");
+    for( int i=0; i!=FMM.MPISIZE; ++i ) {
+      FMM.shiftBodies(jbodies);
+      FMM.direct(bodies2,jbodies);
+      if(FMM.printNow) std::cout << "Direct loop          : " << i+1 << "/" << FMM.MPISIZE << std::endl;
+    }
+    FMM.normalize(bodies2);
+    FMM.stopTimer("Direct sum",FMM.printNow);
+    FMM.eraseTimer("Direct sum");
+    double diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0, diff3 = 0, norm3 = 0, diff4 = 0, norm4 = 0;
+    DATA.evalError(bodies,bodies2,diff1,norm1,diff2,norm2);
+    MPI_Reduce(&diff1,&diff3,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
+    MPI_Reduce(&norm1,&norm3,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
+    MPI_Reduce(&diff2,&diff4,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
+    MPI_Reduce(&norm2,&norm4,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
+    if(FMM.printNow) DATA.printError(diff3,norm3,diff4,norm4);
   }
 
 #ifdef VTK

examples/serial.cxx

 #endif
 
 int main(int argc, char ** argv) {
-  Args ARGS(argc,argv);
+  Args ARGS(argc, argv);
   Bodies bodies, jbodies;
   Cells cells, jcells;
   Dataset DATA;
     bodies.resize(numBodies);
     DATA.initBodies(bodies, ARGS.distribution);
     FMM.setBounds(bodies);
-    FMM.buildTree(bodies,cells);                                // TODO : make it work without this
+    FMM.buildTree(bodies, cells);                               // TODO : make it work without this
     FMM.resetTimer();
     FMM.startTimer("FMM");
-    FMM.buildTree(bodies,cells);
+    FMM.buildTree(bodies, cells);
     FMM.upwardPass(cells);
-    if (!ARGS.buildOnly) {
-      FMM.startPAPI();
-      FMM.evaluate(cells,cells,ARGS.mutual);
-      FMM.stopPAPI();
-      FMM.downwardPass(cells);
-    }
-    FMM.stopTimer("FMM",FMM.printNow);
+    FMM.startPAPI();
+    FMM.evaluate(cells,cells,ARGS.mutual);
+    FMM.stopPAPI();
+    FMM.downwardPass(cells);
+    FMM.stopTimer("FMM", FMM.printNow);
     FMM.eraseTimer("FMM");
     FMM.writeTime();
     FMM.resetTimer();
-    if (!ARGS.buildOnly) {
-      jbodies = bodies;
-      if (int(bodies.size()) > ARGS.numTarget) bodies.resize(ARGS.numTarget);
-      Bodies bodies2 = bodies;
-      DATA.initTarget(bodies2);
-      FMM.startTimer("Direct sum");
-      FMM.direct(bodies2,jbodies);
-      FMM.normalize(bodies2);
-      FMM.stopTimer("Direct sum",FMM.printNow);
-      FMM.eraseTimer("Direct sum");
-      real_t diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0;
-      DATA.evalError(bodies,bodies2,diff1,norm1,diff2,norm2);
-      if(FMM.printNow) DATA.printError(diff1,norm1,diff2,norm2);
-    }
+    jbodies = bodies;
+    if (int(bodies.size()) > ARGS.numTarget) bodies.resize(ARGS.numTarget);
+    Bodies bodies2 = bodies;
+    DATA.initTarget(bodies2);
+    FMM.startTimer("Direct sum");
+    FMM.direct(bodies2, jbodies);
+    FMM.normalize(bodies2);
+    FMM.stopTimer("Direct sum",FMM.printNow);
+    FMM.eraseTimer("Direct sum");
+    real_t diff1 = 0, norm1 = 0, diff2 = 0, norm2 = 0;
+    DATA.evalError(bodies, bodies2, diff1, norm1, diff2, norm2);
+    if(FMM.printNow) DATA.printError(diff1, norm1, diff2, norm2);
   }
 #ifdef VTK
   for( B_iter B=jbodies.begin(); B!=jbodies.end(); ++B ) B->ICELL = 0;
   int Ncell = 0;
   vtkPlot vtk;
   vtk.setDomain(M_PI,0);
-  vtk.setGroupOfPoints(jbodies,Ncell);
+  vtk.setGroupOfPoints(jbodies, Ncell);
   vtk.plot(Ncell);
 #endif
 }
   {"nspawn",       1, 0, 's'},
   {"images",       1, 0, 'i'},
   {"theta",        1, 0, 'h'},
-  {"buildOnly",    1, 0, 'b'},
   {"mutual",       1, 0, 'm'},
   {"distribution", 1, 0, 'd'},
   {0, 0, 0, 0}
   int NSPAWN;
   int IMAGES;
   double THETA;
-  int buildOnly;
   int mutual;
   const char * distribution;
 
             " --nspawn : Threshold for splitting both cells during recursion (%d)\n"
             " --images : Number of periodic image levels (%d)\n"
             " --theta : Multipole acceptance criterion (%f)\n"
-            " --buildOnly [0/1] : build tree and do not evaluate force (%d)\n"
             " --mutual [0/1] :  use mutual interaction (%d)\n"
             " --distribution [l/c/s/p] : lattice, cube, sphere, plummer (%s)\n",
             name,
             NSPAWN,
             IMAGES,
             THETA,
-            buildOnly,
             mutual,
             distribution);
   }
 
 public:
   Args(int argc, char ** argv) : numBodies(1000000), numTarget(100), NCRIT(10), NSPAWN(1000), IMAGES(0),
-                                 THETA(.6), buildOnly(0), mutual(1), distribution("cube") {
+                                 THETA(.6), mutual(1), distribution("cube") {
     while (1) {
       int option_index;
       int c = getopt_long(argc, argv, "", long_options, &option_index);
       case 'h':
         THETA = atof(optarg);
         break;
-      case 'b':
-        buildOnly = atoi(optarg);
-        break;
       case 'm':
         mutual = atoi(optarg);
         break;
         exit(0);
       }
     }
-    printf("numBodies: %d\n", numBodies);
-    printf("numTarget: %d\n", numTarget);
-    printf("ncrit: %d\n", NCRIT);
-    printf("nspawn: %d\n", NSPAWN);
-    printf("images: %d\n", IMAGES);
-    printf("theta: %f\n", THETA);
-    printf("buildOnly: %d\n", buildOnly);
-    printf("mutual: %d\n", mutual);
-    printf("distribution: %s\n", distribution);
   }
 };
 

include/dataset.h

 
 //! Evaluate relaitve L2 norm error
   void evalError(Bodies &bodies, Bodies &bodies2,
-                 real_t &diff1, real_t &norm1, real_t &diff2, real_t &norm2) {
+                 double &diff1, double &norm1, double &diff2, double &norm2) {
     B_iter B2 = bodies2.begin();                                // Set iterator for bodies2
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++, B2++) { // Loop over bodies & bodies2
       diff1 += (B->TRG[0] - B2->TRG[0]) * (B->TRG[0] - B2->TRG[0]);// Difference of potential
   }
 
 //! Print relative L2 norm error
-  void printError(real_t diff1, real_t norm1, real_t diff2, real_t norm2) {
+  void printError(double diff1, double norm1, double diff2, double norm2) {
     std::cout << std::setw(20) << std::left
               << "Rel. L2 Error (pot)" << " : " << std::sqrt(diff1/norm1) << std::endl;
     std::cout << std::setw(20) << std::left
     if (remainder > iSplit) end++;                              // Adjust the end counter for remainder
   }
 
-//! Get MPI data type
-  template<typename T>
-  MPI_Datatype getType(T object) {
-    MPI_Datatype type = MPI_BYTE;                               // MPI data type
-    if        (typeid(object) == typeid(char)) {                // If data type is char
-      type = MPI_CHAR;                                          //  use MPI_CHAR
-    } else if (typeid(object) == typeid(short)) {               // If data type is short
-      type = MPI_SHORT;                                         //  use MPI_SHORT
-    } else if (typeid(object) == typeid(int)) {                 // If data type is int
-      type = MPI_INT;                                           //  use MPI_INT
-    } else if (typeid(object) == typeid(long)) {                // If data type is long
-      type = MPI_LONG;                                          //  use MPI_LONG
-    } else if (typeid(object) == typeid(long long)) {           // If data type is long long
-      type = MPI_LONG_LONG;                                     //  use MPI_LONG_LONG
-    } else if (typeid(object) == typeid(unsigned char)) {       // If data type is unsigned char
-      type = MPI_UNSIGNED_CHAR;                                 //  use MPI_UNSIGNED_CHAR
-    } else if (typeid(object) == typeid(unsigned short)) {      // If data type is unsigned short
-      type = MPI_UNSIGNED_SHORT;                                //  use MPI_UNSIGNED_SHORT
-    } else if (typeid(object) == typeid(unsigned int)) {        // If data type is unsigned int
-      type = MPI_UNSIGNED;                                      //  use MPI_UNSIGNED
-    } else if (typeid(object) == typeid(unsigned long)) {       // If data type is unsigned long
-      type = MPI_UNSIGNED_LONG;                                 //  use MPI_UNSIGNED_LONG
-    } else if (typeid(object) == typeid(unsigned long long)) {  // If data type is unsigned long long
-      type = MPI_UNSIGNED_LONG_LONG;                            //  use MPI_UNSIGNED_LONG_LONG
-    } else if (typeid(object) == typeid(float)) {               // If data type is float
-      type = MPI_FLOAT;                                         //  use MPI_FLOAT
-    } else if (typeid(object) == typeid(double)) {              // If data type is double
-      type = MPI_DOUBLE;                                        //  use MPI_DOUBLE
-    } else if (typeid(object) == typeid(long double)) {         // If data type is long double
-      type = MPI_LONG_DOUBLE;                                   //  use MPI_LONG_DOUBLE
-    } else if (typeid(object) == typeid(std::complex<float>)) { // If data type is complex<float>
-      type = MPI_COMPLEX;                                       //  use MPI_COMPLEX
-    } else if (typeid(object) == typeid(std::complex<double>)) {// If data type is compelx<double>
-      type = MPI_DOUBLE_COMPLEX;                                //  use MPI_DOUBLE_COMPLEX
-    }                                                           // Endif for data type
-    return type;                                                // Return MPI data type
-  }
-
 //! Print a scalar value on all ranks
   template<typename T>
   void print(T data) {

include/partition.h

 protected:
   Bodies sendBodies;                                            //!< Send buffer for bodies
   Bodies recvBodies;                                            //!< Receive buffer for bodies
-  vec3 * rankXmin;                                              //!< Array for minimum of local domains
-  vec3 * rankXmax;                                              //!< Array for maximum of local domains
+  fvec3 * rankXmin;                                             //!< Array for minimum of local domains
+  fvec3 * rankXmax;                                             //!< Array for maximum of local domains
   int * sendBodyCount;                                          //!< Send count
   int * sendBodyDispl;                                          //!< Send displacement
   int * recvBodyCount;                                          //!< Receive count
 private:
 //! Allreduce bounds from all ranks
   void allreduceBounds() {
-    MPI_Datatype MPI_TYPE = getType(localXmin[0]);              // Get MPI data type
-    MPI_Allreduce(localXmin,globalXmin,3,MPI_TYPE,MPI_MIN,MPI_COMM_WORLD);// Reduce domain Xmin
-    MPI_Allreduce(localXmax,globalXmax,3,MPI_TYPE,MPI_MAX,MPI_COMM_WORLD);// Reduce domain Xmax
+    MPI_Allreduce(localXmin,globalXmin,3,MPI_FLOAT,MPI_MIN,MPI_COMM_WORLD);// Reduce domain Xmin
+    MPI_Allreduce(localXmax,globalXmax,3,MPI_FLOAT,MPI_MAX,MPI_COMM_WORLD);// Reduce domain Xmax
     globalRadius = 0;                                           // Initialize global radius
     globalCenter = (globalXmax + globalXmin) / 2;               //  Calculate global center
     for (int d=0; d<3; d++) {                                   // Loop over dimensions
 
 //! Allgather bounds of all partitions
   void allgatherBounds() {
-    MPI_Datatype MPI_TYPE = getType(localXmin[0]);              // Get MPI data type
-    MPI_Allgather(localXmin,3,MPI_TYPE,&rankXmin[0],3,MPI_TYPE,MPI_COMM_WORLD);// Gather all domain bounds
-    MPI_Allgather(localXmax,3,MPI_TYPE,&rankXmax[0],3,MPI_TYPE,MPI_COMM_WORLD);// Gather all domain bounds
+    MPI_Allgather(localXmin,3,MPI_FLOAT,&rankXmin[0],3,MPI_FLOAT,MPI_COMM_WORLD);// Gather all domain bounds
+    MPI_Allgather(localXmax,3,MPI_FLOAT,&rankXmax[0],3,MPI_FLOAT,MPI_COMM_WORLD);// Gather all domain bounds
   }
 
 //! Set partition of global domain

include/serialfmm.h

 class SerialFMM : public TreeBuilder {
 protected:
   real_t globalRadius;                                          //!< Radius of global root cell
-  vec3 globalCenter;                                            //!< Center of global root cell
-  vec3 globalXmin;                                              //!< Global Xmin for a given rank
-  vec3 globalXmax;                                              //!< Global Xmax for a given rank
+  vec3   globalCenter;                                          //!< Center of global root cell
+  fvec3  globalXmin;                                            //!< Global Xmin for a given rank
+  fvec3  globalXmax;                                            //!< Global Xmax for a given rank
 
 private:
 //! Error optimization of Rcrit

include/treebuilder.h

 protected:
   real_t localRadius;                                           //!< Radius of local root cell
   vec3   localCenter;                                           //!< Center of local root cell
-  vec3   localXmin;                                             //!< Local Xmin for a given rank
-  vec3   localXmax;                                             //!< Local Xmax for a given rank
+  fvec3  localXmin;                                             //!< Local Xmin for a given rank
+  fvec3  localXmax;                                             //!< Local Xmax for a given rank
 
 public:
   int NCRIT;                                                    //!< Number of bodies per leaf cell
 
 // Basic type definitions
 typedef float         real_t;                                   //!< Floating point type
+typedef vec<3,float>  fvec3;                                    //!< Vector of 3 single precision types
 typedef vec<3,real_t> vec3;                                     //!< Vector of 3 floating point types
 typedef vec<4,real_t> vec4;                                     //!< Vector of 4 floating point types
 typedef vec<8,int>    ivec8;                                    //!< Vector of 8 integer types