Commits

Rio Yokota committed 67b9eef

Reorganizing types.h.

Comments (0)

Files changed (11)

 ### BG/P compiler
 #CXX	= mpixlcxx_r -qarch=450 -qtune=450 -O3
 ### TAU compiler
-#CXX	= tau_cxx.sh -ggdb3 -Wall -Wextra -Wshadow -Wuninitialized -O3 -ffast-math -funroll-loops -fforce-addr -fbounds-check
+#CXX	= tau_cxx.sh -ggdb3 -Wall -Wextra -Wshadow -Wuninitialized -O3 -msse4.2 -ffast-math -funroll-loops -fforce-addr -fbounds-check
 
 ### CUDA compiler
 NVCC	= nvcc --compiler-bindir=/usr/bin/g++-4.4 -Xcompiler -fopenmp --ptxas-options=-v\
 LFLAGS	+= -std=c++0x -DMTHREAD -lmyth
 
 ### PAPI flags
-#LFLAGS	+= -DPAPI -lpapi
+LFLAGS	+= -DPAPI -lpapi
 
 ### VTK flags : VTK is available from http://www.vtk.org/VTK/resources/software.html
 #CXX	+= -I$(VTK_INCLUDE_PATH)

examples/parallel.cxx

   Cells cells, jcells;
   Dataset DATA;
   ParallelFMM FMM;
-  FMM.printNow = MPIRANK == 0;
+  FMM.printNow = FMM.MPIRANK == 0;
 #if AUTO
   FMM.timeKernels();
 #endif
   numBodies = int(pow(10,(it+24)/8.0));
 #else
   {
-  numBodies = 1000000 / MPISIZE;
+  numBodies = 1000000 / FMM.MPISIZE;
 #endif
   if(FMM.printNow) std::cout << "N                    : " << numBodies << std::endl;
   bodies.resize(numBodies);
-  DATA.cube(bodies,MPIRANK);
+  DATA.initBodies(bodies,FMM.MPIRANK,FMM.MPISIZE);
   FMM.startTimer("FMM");
 
   FMM.partition(bodies);
   FMM.commCells();
   FMM.evaluate(cells,cells);
   jbodies = bodies;
-  for( int irank=1; irank<MPISIZE; irank++ ) {
-    FMM.getLET(jcells,(MPIRANK+irank)%MPISIZE);
+  for( int irank=1; irank<FMM.MPISIZE; irank++ ) {
+    FMM.getLET(jcells,(FMM.MPIRANK+irank)%FMM.MPISIZE);
 
 #if 0 // Set to 1 for debugging full LET communication : Step 2 (LET must be set to full tree)
     FMM.shiftBodies(jbodies); // This will overwrite recvBodies. (define recvBodies2 in partition.h to avoid this)
       C_iter Ci=Qi.front(); Qi.pop();
       C_iter Cj=Qj.front(); Qj.pop();
       if( Ci->ICELL != Cj->ICELL ) {
-        std::cout << MPIRANK << " ICELL  : " << Ci->ICELL << " " << Cj->ICELL << std::endl;
+        std::cout << FMM.MPIRANK << " ICELL  : " << Ci->ICELL << " " << Cj->ICELL << std::endl;
         break;
       }
       if( Ci->NCHILD != Cj->NCHILD ) {
-        std::cout << MPIRANK << " NCHILD : " << Ci->NCHILD << " " << Cj->NCHILD << std::endl;
+        std::cout << FMM.MPIRANK << " NCHILD : " << Ci->NCHILD << " " << Cj->NCHILD << std::endl;
         break;
       }
       if( Ci->NCLEAF != Cj->NCLEAF ) {
-        std::cout << MPIRANK << " NCLEAF : " << Ci->NCLEAF << " " << Cj->NCLEAF << std::endl;
+        std::cout << FMM.MPIRANK << " NCLEAF : " << Ci->NCLEAF << " " << Cj->NCLEAF << std::endl;
         break;
       }
       real_t sumi = 0, sumj = 0;
           sumj += Bj->X[0];
         }
       }
-      if( fabs(sumi-sumj)/fabs(sumi) > 1e-6 ) std::cout << MPIRANK << " " << Ci->ICELL << " " << sumi << " " << sumj << std::endl;
+      if( fabs(sumi-sumj)/fabs(sumi) > 1e-6 ) std::cout << FMM.MPIRANK << " " << Ci->ICELL << " " << sumi << " " << sumj << std::endl;
       assert( fabs(sumi-sumj)/fabs(sumi) < 1e-6 );
       for( int i=0; i<Ci->NCHILD; i++ ) Qi.push(icells.begin()+Ci->CHILD+i);
       for( int i=0; i<Cj->NCHILD; i++ ) Qj.push(jcells.begin()+Cj->CHILD+i);
   }
 #else
   jbodies = bodies;
-  for( int irank=0; irank!=MPISIZE; irank++ ) {
+  for( int irank=0; irank!=FMM.MPISIZE; irank++ ) {
     FMM.shiftBodies(jbodies);
     jcells.clear();
     FMM.setBounds(jbodies);
   Bodies bodies2 = bodies;
   DATA.initTarget(bodies2);
   FMM.startTimer("Direct sum");
-  for( int i=0; i!=MPISIZE; ++i ) {
+  for( int i=0; i!=FMM.MPISIZE; ++i ) {
     FMM.shiftBodies(jbodies);
     FMM.direct(bodies2,jbodies);
-    if(FMM.printNow) std::cout << "Direct loop          : " << i+1 << "/" << MPISIZE << std::endl;
+    if(FMM.printNow) std::cout << "Direct loop          : " << i+1 << "/" << FMM.MPISIZE << std::endl;
   }
   FMM.normalize(bodies2);
   FMM.stopTimer("Direct sum",FMM.printNow);
   }
   int Ncell = 0;
   vtkPlot vtk;
-  if( MPIRANK == 0 ) {
+  if( FMM.MPIRANK == 0 ) {
     vtk.setDomain(M_PI,0);
     vtk.setGroupOfPoints(jbodies,Ncell);
   }
-  for( int i=1; i!=MPISIZE; ++i ) {
+  for( int i=1; i!=FMM.MPISIZE; ++i ) {
     FMM.shiftBodies(jbodies);
-    if( MPIRANK == 0 ) {
+    if( FMM.MPIRANK == 0 ) {
       vtk.setGroupOfPoints(jbodies,Ncell);
     }
   }
-  if( MPIRANK == 0 ) {
+  if( FMM.MPIRANK == 0 ) {
     vtk.plot(Ncell);
   }
 #endif

examples/serial.cxx

   Args args[1];
   parse_cmdline_args(argc, argv, args);
   showArgs(args);
-  NCRIT = args->ncrit;
-  NSPAWN = args->nspawn;
-  IMAGES = args->images;
-  THETA = args->theta;
 
   Bodies bodies, jbodies;
   Cells cells, jcells;
   Dataset DATA;
   SerialFMM FMM;
+  FMM.NCRIT = args->ncrit;
+  FMM.NSPAWN = args->nspawn;
+  FMM.IMAGES = args->images;
+  FMM.THETA = args->theta;
   FMM.printNow = true;
 #if AUTO
   FMM.timeKernels();
     if(FMM.printNow) std::cout << std::endl
       << "N                    : " << numBodies << std::endl;
     bodies.resize(numBodies);
-    gendata(DATA, bodies, args->distribution);
+    DATA.initBodies(bodies, args->distribution);
     FMM.setBounds(bodies);
     FMM.buildTree(bodies,cells);                                // TODO : make it work without this
     FMM.resetTimer();

include/dataset.h

 private:
   long filePosition;                                            // Position of file stream
 
-public:
-  Dataset() : filePosition(0) {}                                // Constructor
-  ~Dataset() {}                                                 // Destructor
-
-  void initSource(Bodies &bodies) {                             // Initialize source values
+private:
+//! Initialize source values
+  void initSource(Bodies &bodies, int mpisize) {
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
-      B->IBODY = B-bodies.begin();                              //  Tag body with initial index
-      B->IPROC = MPIRANK;                                       //  Tag body with initial MPI rank
-//      B->SRC = (drand48() - .5) / bodies.size() / MPISIZE;      //  Initialize mass/charge
-      B->SRC = 1. / bodies.size() / MPISIZE;                    //  Initialize mass/charge
+#if 0
+      B->SRC = (drand48() - .5) / bodies.size() / mpisize;      //  Initialize mass/charge
+#else
+      B->SRC = 1. / bodies.size() / mpisize;                    //  Initialize mass/charge
+#endif
     }                                                           // End loop over bodies
   }
 
-  void initTarget(Bodies &bodies, bool IeqJ=true) {             // Initialize target values
-    srand48(0);                                                 // Set seed for random number generator
-    for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
-      B->IBODY = B-bodies.begin();                              //  Tag body with initial index
-      B->IPROC = MPIRANK;                                       //  Tag body with initial MPI rank
-      B->TRG = 0 * IeqJ;                                        //  Clear previous target values (IeqJ is dummy)
-    }                                                           // End loop over bodies
-  }
-
-  void lattice(Bodies &bodies) {                                // Uniform distribution on [-1,1]^3 lattice
-    int level = int(log(bodies.size()*MPISIZE+1.)/M_LN2/3);     // Level of tree
+//! Uniform distribution on [-1,1]^3 lattice
+  void lattice(Bodies &bodies, int mpirank, int mpisize) {
+    int level = int(log(bodies.size()*mpisize+1.)/M_LN2/3);     // Level of tree
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       int d = 0, l = 0;                                         //  Initialize dimension and level
-      int index = MPIRANK * bodies.size() + (B-bodies.begin()); //  Set index of body iterator
+      int index = mpirank * bodies.size() + (B-bodies.begin()); //  Set index of body iterator
       vec<3,int> nx = 0;                                        //  Initialize 3-D cell index
       while (index != 0) {                                      //  Deinterleave bits while index is nonzero
         nx[d] += (index & 1) * (1 << l);                        //   Add deinterleaved bit to 3-D cell index
         B->X[d] = -1 + (2 * nx[d] + 1.) / (1 << level);         //   Calculate cell center from 3-D cell index
       }                                                         //  End loop over dimensions
     }                                                           // End loop over bodies
-    initSource(bodies);                                         // Initialize source values
-    initTarget(bodies);                                         // Initialize target values
   }
 
-  void cube(Bodies &bodies, int seed=0, int numSplit=1) {       // Random distribution in [-1,1]^3 cube
+//! Random distribution in [-1,1]^3 cube
+  void cube(Bodies &bodies, int seed, int numSplit) {
     srand48(seed);                                              // Set seed for random number generator
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       if (numSplit != 1 && B-bodies.begin() == int(seed*bodies.size()/numSplit)) {// Mimic parallel dataset
         B->X[d] = drand48() * 2 * M_PI - M_PI;                  //   Initialize positions
       }                                                         //  End loop over dimension
     }                                                           // End loop over bodies
-    initSource(bodies);                                         // Initialize source values
-    initTarget(bodies);                                         // Initialize target values
   }
 
-  void sphere(Bodies &bodies, int seed=0, int numSplit=1) {     // Random distribution on r = 1 sphere
+//! Random distribution on r = 1 sphere
+  void sphere(Bodies &bodies, int seed, int numSplit) {
     srand48(seed);                                              // Set seed for random number generator
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       if (numSplit != 1 && B-bodies.begin() == int(seed*bodies.size()/numSplit)) {// Mimic parallel dataset
         B->X[d] /= r * 1.1;                                     //   Normalize positions
       }                                                         //  End loop over dimension
     }                                                           // End loop over bodies
-    initSource(bodies);                                         // Initialize source values
-    initTarget(bodies);                                         // Initialize target values
   }
 
-  void plummer(Bodies &bodies, int seed=0, int numSplit=1) {
+//! Plummer distribution in a r = M_PI/2 sphere
+  void plummer(Bodies &bodies, int seed, int numSplit) {
     srand48(seed);                                              // Set seed for random number generator
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       if (numSplit != 1 && B-bodies.begin() == int(seed*bodies.size()/numSplit)) { // Mimic parallel dataset
       real_t rscale = 0.015 * M_PI / std::sqrt(norm(B->X) * R2);//  Scaling to fit in M_PI box
       for (int d=0; d<3; d++) B->X[d] *= rscale;                //  Rescale particle coordinates
     }                                                           // End loop over bodies
-    initSource(bodies);                                         // Initialize source values
+  }
+
+public:
+//! Constructor
+  Dataset() : filePosition(0) {}
+//! Destructor
+  ~Dataset() {}
+
+//! Initialize target values
+  void initTarget(Bodies &bodies) {
+    for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
+      B->TRG = 0;                                               //  Clear target values
+    }                                                           // End loop over bodies
+  }
+
+//! Initialize dsitribution, source & target value of bodies
+  void initBodies(Bodies &bodies, const char * distribution, int mpirank=0, int mpisize=1) {
+    switch (distribution[0]) {                                  // Switch between data distribution type
+    case 'l':                                                   // Case for lattice
+      lattice(bodies,mpirank,mpisize);                          //  Uniform distribution on [-1,1]^3 lattice
+      break;                                                    // End case for lattice
+    case 'c':                                                   // Case for cube
+      cube(bodies,mpirank,mpisize);                             //  Random distribution in [-1,1]^3 cube
+      break;                                                    // End case for cube
+    case 's':                                                   // Case for sphere
+      sphere(bodies,mpirank,mpisize);                           //  Random distribution on surface of r = 1 sphere
+      break;                                                    // End case for sphere
+    case 'p':                                                   // Case plummer
+      plummer(bodies,mpirank,mpisize);                          //  Plummer distribution in a r = M_PI/2 sphere
+      break;                                                    // End case for plummer
+    default:                                                    // If none of the above
+      fprintf(stderr, "unknown data distribution %s\n", distribution);// Print error message
+    }                                                           // End switch between data distribution type
+    initSource(bodies,mpisize);                                 // Initialize source values
     initTarget(bodies);                                         // Initialize target values
   }
 
-  void readTarget(Bodies &bodies) {                             // Read target values from file
+//! Read target values from file
+  void readTarget(Bodies &bodies, int mpirank) {
     char fname[256];                                            // File name for saving direct calculation values
-    sprintf(fname,"direct%4.4d",MPIRANK);                       // Set file name
+    sprintf(fname,"direct%4.4d",mpirank);                       // Set file name
     std::ifstream file(fname,std::ios::in | std::ios::binary);  // Open file
     file.seekg(filePosition);                                   // Set position in file
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
     file.close();                                               // Close file
   }
 
-  void writeTarget(Bodies &bodies) {                            // Write target values to file
+//! Write target values to file
+  void writeTarget(Bodies &bodies, int mpirank) {
     char fname[256];                                            // File name for saving direct calculation values
-    sprintf(fname,"direct%4.4d",MPIRANK);                       // Set file name
+    sprintf(fname,"direct%4.4d",mpirank);                       // Set file name
     std::ofstream file(fname,std::ios::out | std::ios::app | std::ios::binary);// Open file
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       file << B->TRG[0] << std::endl;                           //  Write data for potential
     file.close();                                               // Close file
   }
 
-  void evalError(Bodies &bodies, Bodies &bodies2,               // Evaluate error
+//! Evaluate relaitve L2 norm error
+  void evalError(Bodies &bodies, Bodies &bodies2,
                  real_t &diff1, real_t &norm1, real_t &diff2, real_t &norm2) {
     B_iter B2 = bodies2.begin();                                // Set iterator for bodies2
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++, B2++) { // Loop over bodies & bodies2
     }                                                           //  End loop over bodies & bodies2
   }
 
-  void printError(real_t diff1, real_t norm1, real_t diff2, real_t norm2) {// Print relative L2 norm error
+//! Print relative L2 norm error
+  void printError(real_t diff1, real_t norm1, real_t diff2, real_t norm2) {
     std::cout << std::setw(20) << std::left
               << "Rel. L2 Error (pot)" << " : " << std::sqrt(diff1/norm1) << std::endl;
     std::cout << std::setw(20) << std::left
 #include <cmath>
 #include "sort.h"
 
-class Kernel : public Sort {
+class Kernel : public Sort, public Parameters {
 protected:
   C_iter Ci0;
   C_iter Cj0;
 #if PAPI
     long long values[3] = {0,0,0};                              // Values for each event
     PAPI_stop(PAPIEVENT,values);                                // PAPI stop
+    std::cout << std::setw(stringLength) << std::left           //  Set format
+              << "L2 Miss" << " : " << values[0] << std::endl;  //  Print PAPI event and count
     std::cout << "L2 Miss: " << values[0]                       // Print L2 Misses
               << " L2 Access: " << values[1]                    // Print L2 Access
               << " TLB Miss: " << values[2] << std::endl;       // Print TLB Misses
   }
 
 //! Write traces of all events
-  inline void writeTrace() {
+  inline void writeTrace(int mpirank) {
     char fname[256];                                            // File name
-    sprintf(fname,"trace%4.4d.svg",MPIRANK);                    // Create file name for trace
+    sprintf(fname,"trace%4.4d.svg",mpirank);                    // Create file name for trace
     std::ofstream traceFile(fname);                             // Open trace log file
     traceFile << "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" // Header statements for trace log file
         << "<!DOCTYPE svg PUBLIC \"-_W3C_DTD SVG 1.0_EN\" \"http://www.w3.org/TR/SVG/DTD/svg10.dtd\">\n"
   const int WAIT;                                               //!< Waiting time between output of different ranks
 
 public:
+  int MPIRANK;                                                  //!< Rank of MPI communicator
+  int MPISIZE:                                                  //!< Size of MPI communicator
+
+public:
 //! Constructor, initialize WAIT time
   MyMPI() : EXTERNAL(0), WAIT(100) {                            // Constructor, initialize WAIT time
     int argc(0);                                                // Dummy argument count

include/options.h

   return args;
 }
 
-static int gendata(Dataset& data, Bodies& bodies, const char * distribution) {
-  switch (distribution[0]) {
-  case 'l':
-    data.lattice(bodies);
-    return 1;
-  case 'c':
-    data.cube(bodies);
-    return 1;
-  case 's':
-    data.sphere(bodies);
-    return 1;
-  case 'p':
-    data.plummer(bodies);
-    return 1;                        
-  default:
-    fprintf(stderr, "unknown data distribution %s\n", distribution);
-    return 0;                        
-  }
-}
-
 #endif

include/parameters.h

+#ifndef parameters_h
+#define parameters_h
+
+// Compile-time parameters
+static const int P = 3;                                         //!< Order of expansions
+
+// Runtime parameters
+struct Parameters {
+  int NCRIT;                                                    //!< Number of bodies per leaf cell
+  int NSPAWN;                                                   //!< Threshold of NDLEAF for spawning new threads
+  int IMAGES;                                                   //!< Number of periodic image sublevels
+  float THETA;                                                  //!< Multipole acceptance criteria
+  float EPS2;                                                   //!< Softening parameter (squared)
+  Parameters() : NCRIT(10), NSPAWN(1000), IMAGES(0), THETA(.6), EPS2(.0) {}
+};
+
+#endif

include/treebuilder.h

     C->NDLEAF = octNode->NLEAF;                                 // Number of decendant leafs
     C->LEAF   = B0 + octNode->LEAF;                             // Iterator of first body in cell
     if (octNode->NNODE == 1) {                                  // If node has no children
-      C->CHILD = 0;                                             //  Set index of first child cell to zero
+      C->CHILD  = 0;                                            //  Set index of first child cell to zero
       C->NCHILD = 0;                                            //  Number of child cells
       C->NCLEAF = octNode->NLEAF;                               //  Number of bodies in cell
       assert(C->NCLEAF > 0);
 #endif
 
 #include <map>
+#include "parameters.h"
 #include <pthread.h>
 #include <queue>
 #include <string>
 typedef vec<8,int>    ivec8;                                    //!< Vector of 8 integer types
 typedef std::pair<vec3,vec3> vec3Pair;                          //!< Pair of vec3
 
-#ifndef KERNEL
-int MPIRANK    = 0;                                             //!< MPI comm rank
-int MPISIZE    = 1;                                             //!< MPI comm size
-int NCRIT      = 10;                                            //!< Number of bodies per leaf cell
-int NSPAWN     = 1000;                                          //!< Threshold of NDLEAF for spawning new threads
-int IMAGES     = 0;                                             //!< Number of periodic image sublevels
-real_t THETA   = .5;                                            //!< Multipole acceptance criteria
-real_t EPS2    = .0;                                            //!< Softening parameter (squared)
+//#ifndef KERNEL
+namespace {
 vec3 Xperiodic = .0;                                            //!< Coordinate offset of periodic image
 #if PAPI
 int PAPIEVENT  = PAPI_NULL;                                     //!< PAPI event handle
 #endif
-#else
-extern int MPIRANK;                                             //!< MPI comm rank
-extern int MPISIZE;                                             //!< MPI comm size
-extern int NCRIT;                                               //!< Number of bodies per leaf cell
-extern int NSPAWN;                                              //!< Threshold of NDLEAF for spawning new threads
-extern int IMAGES;                                              //!< Number of periodic image sublevels
-extern real_t THETA;                                            //!< Multipole acceptance criteria
-extern real_t EPS2;                                             //!< Softening parameter (squared)
-extern vec3 Xperiodic;                                          //!< Coordinate offset of periodic image
-#if PAPI
-extern int PAPIEVENT;                                           //!< PAPI event handle
-#endif
-#endif
-
-const int    P      = 3;                                        //!< Order of expansions
-const real_t EPS    = 1e-6;                                     //!< Single precision epsilon
+}
 
 #if COMkernel
 const int MTERM = P*(P+1)*(P+2)/6-3;                            //!< Number of Cartesian mutlipole terms