Commits

Rio Yokota committed 95670a1

Now runs on BG/Q.

Comments (0)

Files changed (8)

 CXX	= mpicxx -ggdb3 -Wall -Wextra -O3 -msse4a -ffast-math -funroll-loops # GCC
 #CXX	= mpicxx -Wall -xHOST -O3 -funroll-loops -finline-functions -ansi-alias # Intel
 #CXX	= icpc -Wall -mmic -O3 -L/opt/intel/tbb/lib/mic -lpthread # Intel MIC
+#CXX	= mpic++ -Wall -mmic -O3 -L/opt/apps/intel/13/composer_xe_2013.2.146/tbb/lib/mic # Stampede
+#CXX	= mpixlcxx_r -qarch=qp -qtune=qp -O3 -qhot -I$(HOME)/tbb/include # BG/Q
 #CXX	= mpixlcxx_r -qarch=450 -qtune=450 -O3 # BG/P
 #CXX	= tau_cxx.sh # TAU compiler instrumentation
 
 
 template <typename T, size_t NALIGN>
 struct AlignedAllocator : public std::allocator<T> {
-  using typename std::allocator<T>::size_type;
-  using typename std::allocator<T>::pointer;
-
   template <typename U>
   struct rebind {
     typedef AlignedAllocator<U, NALIGN> other;
   };
 
-  pointer allocate(size_type n) {
+  T * allocate(size_t n) {
     void *ptr = NULL;
     int rc = posix_memalign(&ptr, NALIGN, n * sizeof(T));
     if (rc != 0) return NULL;
     if (ptr == NULL) throw std::bad_alloc();
-    return reinterpret_cast<pointer>(ptr);
+    return reinterpret_cast<T*>(ptr);
   }
 
-  void deallocate(pointer p, size_type) {
+  void deallocate(T * p, size_t) {
     return free(p);
   }
 };
       fprintf(stderr, "invalid distribution %s\n", arg);
       exit(0);
     }
+    return "";
   }
 
  public:

include/dataset.h

     std::stringstream name;                                     // File name
     name << "direct" << std::setfill('0') << std::setw(4)       // Set format
          << mpirank << ".dat";                                  // Create file name for saving direct calculation values
-    std::ifstream file(name.str(),std::ios::in | std::ios::binary);// Open file
+    std::ifstream file(name.str().c_str(),std::ios::in | std::ios::binary);// Open file
     file.seekg(filePosition);                                   // Set position in file
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       file >> B->TRG[0];                                        //  Read data for potential
     std::stringstream name;                                     // File name
     name << "direct" << std::setfill('0') << std::setw(4)       // Set format
          << mpirank << ".dat";                                  // Create file name for saving direct calculation values
-    std::ofstream file(name.str(),std::ios::out | std::ios::app | std::ios::binary);// Open file
+    std::ofstream file(name.str().c_str(),std::ios::out | std::ios::app | std::ios::binary);// Open file
     for (B_iter B=bodies.begin(); B!=bodies.end(); B++) {       // Loop over bodies
       file << B->TRG[0] << std::endl;                           //  Write data for potential
       file << B->TRG[1] << std::endl;                           //  Write data for x acceleration
   pthread_t thread;                                             //!< pthread id
   double    begin;                                              //!< Begin timer of trace
   double    end;                                                //!< End timer of trace
+  Trace() {}                                                    //!< Constructor
 };
 
 //! Timer and Trace logger
 // Detect SIMD Byte length of architecture
 #if __MIC__
 const int SIMD_BYTES = 64;                                      //!< SIMD byte length of MIC
-#elif __AVX__
+#elif __AVX__ | __bgq__
 const int SIMD_BYTES = 32;                                      //!< SIMD byte length of AVX
 #elif __SSE__
 const int SIMD_BYTES = 16;                                      //!< SIMD byte length of SSE
 #endif
 
+// Bluegene/Q and K computer don't have single precision arithmetic
+#if __bgq__ | __SPARC__
+#ifndef FP64
+#error Please use FP64 for BG/Q and K computer
 #endif
+#endif
+
+#endif
   kvec4  TRG;                                                   //!< Scalar+vector3 target values
 };
 typedef AlignedAllocator<Body,SIMD_BYTES> BodyAllocator;        //!< Body alignment allocator
-typedef std::vector<Body,BodyAllocator>   Bodies;               //!< Vector of bodies
+//typedef std::vector<Body,BodyAllocator>   Bodies;               //!< Vector of bodies
+typedef std::vector<Body>                 Bodies;               //!< Vector of bodies
 typedef Bodies::iterator                  B_iter;               //!< Iterator of body vector
 
 //! Structure of cells
     return *this;
   }
   const vec &operator&=(const vec &v) {                         // Vector compound assignment (bitwise and)
-    for (int i=0; i<N; i++) data[i] &= v[i];
+    for (int i=0; i<N; i++) {
+      int temp = int(data[i]) & int(v[i]);
+      data[i] = temp;
+    }
     return *this;
   }
   const vec &operator|=(const vec &v) {                         // Vector compound assignment (bitwise or)
-    for (int i=0; i<N; i++) data[i] |= v[i];
+    for (int i=0; i<N; i++) {
+      int temp = int(data[i]) | int(v[i]);
+      data[i] = temp;
+    }
     return *this;
   }
   vec operator+(const T v) const {                              // Scalar arithmetic (add)
     for (int i=0; i<N; i++) temp[i] = v[i] > w[i] ? v[i] : w[i];
     return temp;
   }
+  friend vec rsqrt(const vec &v) {                              // Reciprocal square root
+    vec temp;
+    for (int i=0; i<N; i++) temp[i] = 1. / std::sqrt(v[i]);
+    return temp;
+  }
 };
 
 #if __MIC__
 };
 #endif
 
+#if __bgq__
+template<>
+class vec<4,double> {
+ private:
+  vector4double data;
+ public:
+  vec(){}                                                       // Default constructor
+  vec(const double v) {                                         // Copy constructor scalar
+    vector4double temp = {v};
+    data = temp;
+  }
+  vec(const vector4double v) {                                  // Copy constructor SIMD register
+    data = v;
+  }
+  vec(const vec &v) {                                           // Copy constructor vector
+    data = v.data;
+  }
+  vec(const double a, const double b, const double c, const double d) {// Copy constructor (component-wise)
+    vector4double temp = {a,b,c,d};
+    data = temp;
+  }
+  ~vec(){}                                                      // Destructor
+  const vec &operator=(double v) {                              // Scalar assignment
+    vector4double temp = {v};
+    data = temp;
+    return *this;
+  }
+  const vec &operator=(const vec &v) {                          // Vector assignment
+    data = v.data;
+    return *this;
+  }
+  const vec &operator+=(const vec &v) {                         // Vector compound assignment (add)
+    data = vec_add(data,v.data);
+    return *this;
+  }
+  const vec &operator-=(const vec &v) {                         // Vector compound assignment (subtract)
+    data = vec_sub(data,v.data);
+    return *this;
+  }
+  const vec &operator*=(const vec &v) {                         // Vector compound assignment (multiply)
+    data = vec_mul(data,v.data);
+    return *this;
+  }
+  const vec &operator/=(const vec &v) {                         // Vector compound assignment (divide)
+    data = vec_swdiv_nochk(data,v.data);
+    return *this;
+  }
+  const vec &operator&=(const vec &v) {                         // Vector compound assignment (bitwise and)
+    data = vec_and(data,v.data);
+    return *this;
+  }
+  vec operator+(const vec &v) const {                           // Vector arithmetic (add)
+    return vec(vec_add(data,v.data));
+  }
+  vec operator-(const vec &v) const {                           // Vector arithmetic (subtract)
+    return vec(vec_sub(data,v.data));
+  }
+  vec operator*(const vec &v) const {                           // Vector arithmetic (multiply)
+    return vec(vec_mul(data,v.data));
+  }
+  vec operator/(const vec &v) const {                           // Vector arithmetic (divide)
+    return vec(vec_swdiv_nochk(data,v.data));
+  }
+  vec operator>(const vec &v) const {                           // Vector arithmetic (greater than)
+    return vec(vec_cmpgt(data,v.data));
+  }
+  vec operator<(const vec &v) const {                           // Vector arithmetic (less than)
+    return vec(vec_cmplt(data,v.data));
+  }
+  vec operator-() const {                                       // Vector arithmetic (negation)
+    return vec(vec_sub((vector4double)(0),data));
+  }
+  double &operator[](int i) {                                   // Indexing (lvalue)
+    return ((double*)&data)[i];
+  }
+  const double &operator[](int i) const {                       // Indexing (rvalue)
+    return ((double*)&data)[i];
+  }
+  friend std::ostream &operator<<(std::ostream &s, const vec &v) {// Component-wise output stream
+    for (int i=0; i<4; i++) s << vec_extract(v.data,i) << ' ';
+    return s;
+  }
+  friend double sum(const vec &v) {                             // Sum vector
+    double temp = 0;
+    for (int i=0; i<4; i++) temp += v[i];
+    return temp;
+  }
+  friend double norm(const vec &v) {                            // L2 norm squared
+    double temp = 0;
+    for (int i=0; i<4; i++) temp += v[i] * v[i];
+    return temp;
+  }
+  friend vec min(const vec &v, const vec &w) {                  // Element-wise minimum
+    vec temp;
+    for (int i=0; i<4; i++) temp[i] = v[i] < w[i] ? v[i] : w[i];
+    return temp;
+  }
+  friend vec max(const vec &v, const vec &w) {                  // Element-wise maximum
+    vec temp;
+    for (int i=0; i<4; i++) temp[i] = v[i] > w[i] ? v[i] : w[i];
+    return temp;
+  }
+  friend vec rsqrt(const vec &v) {                              // Reciprocal square root
+    return vec(vec_rsqrtes(v.data));
+  }
+};
+#endif
+
 #if __SSE3__
 #include <pmmintrin.h>