Commits

Anonymous committed 14cc669

experiments with OpenMP

  • Participants
  • Parent commits 329baa7

Comments (0)

Files changed (4)

 OpenMP support for parallel multiplication and elimination is enabled
 with the
 
-  --with-openmp 
+  --enable-openmp 
 
 configure switch. If GCC is used to compile the library it is avised
 to use at least GCC 4.3 since earlier versions have problems with
 AC_CHECK_HEADER([mm_malloc.h],AC_DEFINE(HAVE_MM_MALLOC,,[Support aligned allocations]),)
 
 # OpenMP support
-AC_ARG_WITH(openmp, [  --with-openmp           add support for OpenMP Parallelism])
+AC_ARG_WITH(openmp, [  --enable-openmp           add support for OpenMP Parallelism])
 if test "x$enable_openmp" = "xyes"; then
    AX_OPENMP()
 fi
 #include <omp.h>
 #endif
 
+/**
+ * Simple blockwise product
+ */
+mzd_t *_mzd_addmul_mp_even(mzd_t *C, mzd_t *A, mzd_t *B, int cutoff);
+
 
 mzd_t *_mzd_mul_even_orig(mzd_t *C, mzd_t *A, mzd_t *B, int cutoff) {
   size_t a,b,c;
     return C;
   }
 
+#ifdef HAVE_OPENMP
+  if (omp_get_max_threads()-omp_get_num_threads() > 0)
+    mzd_set_ui(C, 0);
+    return _mzd_addmul_mp_even(C, A, B, cutoff);
+#endif
+
   /* adjust cutting numbers to work on words */
   {
     unsigned long mult = RADIX;
 
 
 #ifdef HAVE_OPENMP
-mzd_t *_mzd_mul_mp_even(mzd_t *C, mzd_t *A, mzd_t *B, int cutoff) {
+mzd_t *_mzd_addmul_mp_even(mzd_t *C, mzd_t *A, mzd_t *B, int cutoff) {
   /**
    * \todo make sure not to overwrite crap after ncols and before width*RADIX
    */
   {
 #pragma omp section
     {
-      _mzd_mul_even(C00, A00, B00, cutoff);
+      _mzd_addmul_even(C00, A00, B00, cutoff);
       _mzd_addmul_even(C00, A01, B10, cutoff);
     }
 #pragma omp section 
     {
-      _mzd_mul_even(C01, A00, B01, cutoff);
+      _mzd_addmul_even(C01, A00, B01, cutoff);
       _mzd_addmul_even(C01, A01, B11, cutoff);
     }
 #pragma omp section
     {
-      _mzd_mul_even(C10, A10, B00, cutoff);
+      _mzd_addmul_even(C10, A10, B00, cutoff);
       _mzd_addmul_even(C10, A11, B10, cutoff);
     }
 #pragma omp section
     {
-      _mzd_mul_even(C11, A10, B01, cutoff);
+      _mzd_addmul_even(C11, A10, B01, cutoff);
       _mzd_addmul_even(C11, A11, B11, cutoff);
     }
   }
     return C;
   }
 
-#ifdef HAVE_OPENMP
-  /* this one isn't optimal */
-  if (omp_get_max_threads() > 1) {
-    C = _mzd_mul_mp_even(C, A, B, cutoff);
-  } else {
-    C = _mzd_mul_even(C, A, B, cutoff);
-  }
-#else
   C = (A==B)?_mzd_sqr_even(C, A, cutoff):_mzd_mul_even(C, A, B, cutoff);
-#endif  
   return C;
 }
 
     return C;
   }
 
+#ifdef HAVE_OPENMP
+  if (omp_get_max_threads() - omp_get_num_threads() > 0)
+    return _mzd_addmul_mp_even(C, A, B, cutoff);
+#endif
+
   /* adjust cutting numbers to work on words */
   {
     unsigned long mult = RADIX;

testsuite/Makefile

 CFLAGS=-I.. -std=c99
-LDFLAGS=-L../.libs/ -lm4ri
+LDFLAGS=-L../.libs/ -lm4ri -fopenmp
 DEBUG=-ggdb
 
 TEST_PRGS=test_elimination test_multiplication test_trsm test_lqup test_solve test_kernel