Commits

Anonymous committed 5515d12

KAHAN and double real

Comments (0)

Files changed (5)

include/evaluator.h

 #define evaluator_h
 #include "kernel.h"
 #include "logger.h"
+#if COMMON_CILKH
+#include <common.cilkh>
+#else
 #include "thread.h"
+#endif
 #if COUNT
 #define count(N) N++
 #else
   }
   void P2P(C_iter Ci, C_iter Cj, bool mutual) const;
   void P2P(C_iter C) const;
+#if EVAL_ERROR_KAHAN
+  void P2PKahan(C_iter Ci, C_iter Cj) const;
+#endif
   void P2M(C_iter C, real_t &Rmax) const;
   void M2M(C_iter Ci, real_t &Rmax) const;
   void M2L(C_iter Ci, C_iter Cj, bool mutual) const;

include/serialfmm.h

   }
 
 //! Direct summation
-#if EVAL_ERROR_PARTIAL_ACCUMULATE
+#if EVAL_ERROR_KAHAN
+
+  void direct(Bodies &ibodies, Bodies &jbodies) {
+    Cells cells(2);                                             // Define a pair of cells to pass to P2P kernel
+    C_iter Ci = cells.begin(), Cj = cells.begin()+1;            // First cell is target, second cell is source
+    Ci->BODY = ibodies.begin();                                 // Iterator of first target body
+    Ci->NDBODY = ibodies.size();                                // Number of target bodies
+    Cj->BODY = jbodies.begin();                                 // Iterator of first source body
+    Cj->NDBODY = jbodies.size();                                // Number of source bodies
+    int prange = 0;                                             // Range of periodic images
+    for (int i=0; i<IMAGES; i++) {                              // Loop over periodic image sublevels
+      prange += int(powf(3,i));                                 //  Accumulate range of periodic images
+    }                                                           // End loop over perioidc image sublevels
+    for (int ix=-prange; ix<=prange; ix++) {                    // Loop over x periodic direction
+      for (int iy=-prange; iy<=prange; iy++) {                  //  Loop over y periodic direction
+        for (int iz=-prange; iz<=prange; iz++) {                //   Loop over z periodic direction
+          Xperiodic[0] = ix * 2 * globalRadius;                 //    Coordinate shift for x periodic direction
+          Xperiodic[1] = iy * 2 * globalRadius;                 //    Coordinate shift for y periodic direction
+          Xperiodic[2] = iz * 2 * globalRadius;                 //    Coordinate shift for z periodic direction
+          P2PKahan(Ci,Cj);                                      //    Evaluate P2P kernel
+        }                                                       //   End loop over z periodic direction
+      }                                                         //  End loop over y periodic direction
+    }                                                           // End loop over x periodic direction
+  }
+
+#elif EVAL_ERROR_PARTIAL_ACCUMULATE
   void direct(Bodies &ibodies, Bodies &jbodies) {
     Cells cells(2);                                             // Define a pair of cells to pass to P2P kernel
     C_iter Ci = cells.begin(), Cj = cells.begin()+1;            // First cell is target, second cell is source
 #include "vec.h"
 
 // Basic type definitions
+#if defined(REAL_TYPE) && REAL_TYPE == REAL_TYPE_DOUBLE
+typedef double               real_t;                            //!< Floating point type
+#else
 typedef float                real_t;                            //!< Floating point type
+#endif
 typedef std::complex<real_t> complex_t;                         //!< Complex type
 typedef vec<3,real_t>        vec3;                              //!< Vector of 3 floating point types
 typedef vec<3,float>         fvec3;                             //!< Vector of 3 single precision types
 typedef std::pair<vec3,vec3> vec3Pair;                          //!< Pair of vec3
 
 // Compile-time parameters
+#ifdef MULTIPOLE_EXPANSION_ORDER
+const int P = MULTIPOLE_EXPANSION_ORDER;                        //!< Order of expansions
+#else
 const int P = 3;                                                //!< Order of expansions
+#endif
 const float EPS2 = .0;                                          //!< Softening parameter (squared)
 #if COMkernel
 const int MTERM = P*(P+1)*(P+2)/6-3;                            //!< Number of Cartesian mutlipole terms

kernels/LaplaceCartesianCPU.cxx

   int ni = Ci->NDBODY;
   int nj = Cj->NDBODY;
   int i = 0;
-#if __AVX__
+#if __AVX__ && (!defined(REAL_TYPE) || REAL_TYPE == REAL_TYPE_FLOAT)
   for ( ; i<=ni-8; i+=8) {
     __m256 pot = _mm256_setzero_ps();
     __m256 ax = _mm256_setzero_ps();
   }
 #endif // __AVX__
 
-#if __SSE__
+#if __SSE__ && (!defined(REAL_TYPE) || REAL_TYPE == REAL_TYPE_FLOAT)
   for ( ; i<=ni-4; i+=4) {
     __m128 pot = _mm_setzero_ps();
     __m128 ax = _mm_setzero_ps();
   B_iter B = C->BODY;
   int n = C->NDBODY;
   int i = 0;
-#if __AVX__
+#if __AVX__ && (!defined(REAL_TYPE) || REAL_TYPE == REAL_TYPE_FLOAT)
   for ( ; i<=n-8; i+=8) {
     __m256 pot = _mm256_setzero_ps();
     __m256 ax = _mm256_setzero_ps();
   }
 #endif // __AVX__
 
-#if __SSE__
+#if __SSE__ && (!defined(REAL_TYPE) || REAL_TYPE == REAL_TYPE_FLOAT)
   for ( ; i<=n-4; i+=4) {
     __m128 pot = _mm_setzero_ps();
     __m128 ax = _mm_setzero_ps();
   }
 }
 
+#if EVAL_ERROR_KAHAN
+void Kernel::P2PKahan(C_iter Ci, C_iter Cj) const {
+  B_iter Bi = Ci->BODY;
+  B_iter Bj = Cj->BODY;
+  int ni = Ci->NDBODY;
+  int nj = Cj->NDBODY;
+  int i;
+  for (i=0 ; i<ni; i++) {
+    real_t pot = 0;
+    real_t pot_c = 0;
+    vec3 acc = 0;
+    vec3 acc_c = 0;
+    for (int j=0; j<nj; j++) {
+      vec3 dX = Bi[i].X - Bj[j].X - Xperiodic;
+      real_t R2 = norm(dX) + EPS2;
+      if (R2 != 0) {
+        real_t invR2 = 1.0f / R2;
+        real_t invR = Bi[i].SRC * Bj[j].SRC * sqrt(invR2);
+        dX *= invR2 * invR;
+        // pot += invR;
+	real_t pot_t = pot + (invR - pot_c);
+	pot_c = (pot_t - pot) - (invR - pot_c);
+	pot = pot_t;
+	vec3 acc_t = acc + (dX - acc_c);
+	acc_c = (acc_t - acc) - (dX - acc_c);
+        acc = acc_t;
+      }
+    }
+    Bi[i].TRG[0] += pot;
+    Bi[i].TRG[1] -= acc[0];
+    Bi[i].TRG[2] -= acc[1];
+    Bi[i].TRG[3] -= acc[2];
+  }
+}
+#endif
+
 void Kernel::P2M(C_iter C, real_t &Rmax) const {
   for (B_iter B=C->BODY; B!=C->BODY+C->NCBODY; B++) {
     vec3 dX = C->X - B->X;