Commits

Anonymous committed 24f8758

added low leverl parallelisation ot process_rows2_pluq and added that the parallel sections in mzd_mul_mp_even() should use num_threads(4)

Comments (0)

Files changed (2)

   }
 
   wide -= 2;
+#ifdef HAVE_OPENMP
+#pragma omp parallel for private(r) shared(startrow, stoprow) schedule(dynamic,32) if(stoprow-startrow > 128)
+#endif
   for(r=startrow; r<stoprow; r++) {
     const int x0 = L0[ (int)mzd_read_bits(M, r, startcol, ka) ];
     word *t0 = T0->rows[x0] + blocknuma;
   }
 
 #ifdef HAVE_OPENMP
-  if (omp_get_max_threads()-omp_get_num_threads() > 0) {
+  if (omp_get_num_threads() <= omp_get_max_threads() - 4) {
     mzd_set_ui(C, 0);
     return _mzd_addmul_mp_even(C, A, B, cutoff);
   }
   mzd_t *C10 = mzd_init_window(C, anr,   0, 2*anr,   bnc);
   mzd_t *C11 = mzd_init_window(C, anr, bnc, 2*anr, 2*bnc);
   
-#pragma omp parallel sections
+#pragma omp parallel sections num_threads(4)
   {
 #pragma omp section
     {
   }
 
 #ifdef HAVE_OPENMP
-  if (omp_get_max_threads() - omp_get_num_threads() > 0)
+  if (omp_get_num_threads() <= omp_get_max_threads()-4) {
     return _mzd_addmul_mp_even(C, A, B, cutoff);
+  }
 #endif
 
   /* adjust cutting numbers to work on words */