Commits

Rio Yokota committed 9177921

Modified parameters for QUARK runs.

Comments (0)

Files changed (7)

 EXPAND  = Spherical
 
 ### GCC compiler
-CXX	= mpicxx -ggdb3 -Wall -Wextra -O3 -fopenmp -ffast-math -funroll-loops -fforce-addr -fPIC -I../include
+#CXX	= mpicxx -ggdb3 -Wall -Wextra -O3 -fopenmp -ffast-math -funroll-loops -fforce-addr -fPIC -I../include
+CXX	= mpicxx -ggdb3 -Wall -Wextra -O3 -ffast-math -funroll-loops -fforce-addr -fPIC -I../include
 ### Intel compiler
 #CXX	= mpicxx -Wall -xHOST -O3 -openmp -funroll-loops -finline-functions -fPIC -ansi-alias -I../include
 
 #LFLAGS += -DPAPI -lpapi
 
 ### QUARK flags
-#LFLAGS	+= -DQUARK -lquark
+LFLAGS	+= -DQUARK -lquark
 
 ### MassiveThreads flags
 #LFLAGS += -std=c++0x -DMTHREADS -lmyth -lpthread -ldl
 
 ### VTK flags
-CXX     += -I$(VTK_INCLUDE_PATH)
-VFLAGS  = -L$(VTK_LIBRARY_PATH) -lvtkRendering -lvtkGraphics -lvtkFiltering -lvtkViews -lvtkCommon -lvtkWidgets -lvtkIO -DVTK
+#CXX     += -I$(VTK_INCLUDE_PATH)
+#VFLAGS  = -L$(VTK_LIBRARY_PATH) -lvtkRendering -lvtkGraphics -lvtkFiltering -lvtkViews -lvtkCommon -lvtkWidgets -lvtkIO -DVTK
 
 ifeq ($(DEVICE),GPU)
 ### CUDA flags

include/evaluator.h

   void traverseQueue(Pair pair) {
     PairQueue pairQueue;                                        // Queue of interacting cell pairs
 #if QUARK
-    Quark *quark = QUARK_New(4);                                // Initialize QUARK object
-    C_iter root = pair.first;                                   // Iterator for root target cell
+    Quark *quark = QUARK_New(1);                                // Initialize QUARK object
+//    C_iter root = pair.first;                                   // Iterator for root target cell
 #endif
     pairQueue.push_back(pair);                                  // Push pair to queue
     while( !pairQueue.empty() ) {                               // While dual traversal queue is not empty
         }                                                       //   End loop over second cell's children
       }                                                         //  End if for which cell to split
 #if QUARK
-      if( int(pairQueue.size()) > root->NDLEAF / 100 ) {        //  When queue size reaches threshold
+      if( int(pairQueue.size()) > 100 ) {                       //  When queue size reaches threshold
         while( !pairQueue.empty() ) {                           //   While dual traversal queue is not empty
           pair = pairQueue.front();                             //    Get interaction pair from front of queue
           pairQueue.pop_front();                                //    Pop dual traversal queue
 #endif
 
 typedef unsigned           bigint;                              //!< Big integer type
-typedef double             real;                                //!< Real number type on CPU
+typedef float              real;                                //!< Real number type on CPU
 typedef float              gpureal;                             //!< Real number type on GPU
 typedef std::complex<real> complex;                             //!< Complex number type
 typedef vec<3,real>        vect;                                //!< 3-D vector type
 #endif
 #endif
 
-const int  P        = 25;                                       //!< Order of expansions
+const int  P        = 6;                                        //!< Order of expansions
 const int  NCRIT    = 64;                                       //!< Number of bodies per cell
 const int  MAXBODY  = 50000;                                    //!< Maximum number of bodies per GPU kernel
 const int  MAXCELL  = 10000000;                                 //!< Maximum number of bodies/coefs in cell per GPU kernel
 const real R2MAX    = 100.0;                                    //!< Maximum value for L-J R^2
 const int  GPUS     = 3;                                        //!< Number of GPUs per node
 const int  THREADS  = 128;                                      //!< Number of threads per thread-block
-const int  PTHREADS = 4;                                        //!< Number of pthreads in quark
 
 const int MTERM = P*(P+1)*(P+2)/6;                              //!< Number of Cartesian mutlipole terms
 const int LTERM = (P+1)*(P+2)*(P+3)/6;                          //!< Number of Cartesian local terms

kernel/CPUEvaluator.cxx

 void Evaluator<equation>::evalEwaldReal(Cells &cells) {         // Evaluate queued Ewald real kernels
   startTimer("evalEwaldReal");                                  // Start timer
   Ci0 = cells.begin();                                          // Set begin iterator
-#pragma omp parallel for
+//#pragma omp parallel for
   for( int i=0; i<int(cells.size()); ++i ) {                    // Loop over cells
     C_iter Ci = Ci0 + i;                                        //  Target cell iterator
     while( !listP2P[i].empty() ) {                              //  While M2P interaction list is not empty

kernel/CPUEwaldLaplace.cxx

 namespace {
 void dft(Ewalds &ewalds, Bodies &bodies, real R0) {
   real scale = M_PI / R0;
-#pragma omp parallel for
+//#pragma omp parallel for
   for( int i=0; i<int(ewalds.size()); ++i ) {
     E_iter E = ewalds.begin() + i;
     E->REAL = E->IMAG = 0;
 
 void idft(Ewalds &ewalds, Bodies &bodies, real R0) {
   real scale = M_PI / R0;
-#pragma omp parallel for
+//#pragma omp parallel for
   for( int i=0; i<int(bodies.size()); ++i ) {
     B_iter B = bodies.begin() + i;
     vec<4,real> TRG = 0;

kernel/CPUP2P.cxx

 template<>
 void Kernel<Laplace>::P2P(C_iter Ci, C_iter Cj) const {         // Laplace P2P kernel on CPU
 #ifndef SPARC_SIMD
-#pragma omp parallel for
+//#pragma omp parallel for
   for( int i=0; i<Ci->NDLEAF; ++i ) {                           // Loop over target bodies
     B_iter Bi = Ci->LEAF+i;                                     //  Target body iterator
     real P0 = 0;                                                //  Initialize potential

unit_test/serialrun.cxx

 #endif
 
 int main() {
-  const int numBodies = 10000;                                  // Number of bodies
+  const int numBodies = 100000;                                 // Number of bodies
   const int numTarget = 100;                                    // Number of target points to be used for error eval
   IMAGES = 1;                                                   // Level of periodic image tree (0 for non-periodic)
   THETA = 1 / sqrtf(4);                                         // Multipole acceptance criteria