Commits

Martin Albrecht committed 01221dc

choose better values for k if matrices are small

Comments (0)

Files changed (5)

src/brilliantrussian.c

     k = (int)log2((__M4RI_CPU_L2_CACHE/64)/(double)B->width);
     if ((__M4RI_CPU_L2_CACHE - 64*__M4RI_TWOPOW(k)*B->width) > (64*__M4RI_TWOPOW(k+1)*B->width - __M4RI_CPU_L2_CACHE))
       k++;
+
+    rci_t const klog = round(0.75 * log2_floor(MIN(MIN(a_nr,a_nc),b_nc)));
+
+    if(klog < k)
+      k = klog;
+
     if (k<2)
       k=2;
     else if(k>6)

src/echelonform.c

     mzd_set_ui(R, 0);
     mzd_free_window(R);
   }
-  
+
   mzp_free(P);
   mzp_free(Q);
 
   m4ri_codebook = NULL;
 }
 
-static int log2_floor(int v) {
-  static unsigned const int b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 };
-  static unsigned const int S[] = { 1, 2, 4, 8, 16 };
-  unsigned int r = 0;
-  for (int i = 4; i >= 0; --i)
-  {
-    if ((v & b[i]))
-    {
-      v >>= S[i];
-      r |= S[i];
-    } 
-  }
-  return r;
-}
 
 int m4ri_opt_k(int a, int b, int c) {
   int n = MIN(a, b);
 void m4ri_destroy_all_codes(void);
 
 /**
- * \brief Return the optimal var k for the given parameters. 
+ * floor(log_2(v))
+ */
+
+static inline int log2_floor(int v) {
+  static unsigned const int b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 };
+  static unsigned const int S[] = { 1, 2, 4, 8, 16 };
+  unsigned int r = 0;
+  for (int i = 4; i >= 0; --i)
+  {
+    if ((v & b[i]))
+    {
+      v >>= S[i];
+      r |= S[i];
+    }
+  }
+  return r;
+}
+
+
+/**
+ * \brief Return the optimal var k for the given parameters.
  *
  * If var c != 0 then var k for multiplication is returned, else
- * var k for inversion. The optimal var k here means \f$0.75 log_2(n)\f$ 
+ * var k for inversion. The optimal var k here means \f$0.75 log_2(n)\f$
  * where \f$n\f$ is \f$min(a,b)\f$ for inversion and
  * \f$b\f$ for multiplication.
- * 
+ *
  * \param a Number of rows of (first) matrix
  * \param b Number of columns of (first) matrix
  * \param c Number of columns of second matrix (may be 0)

src/ple_russian.c

   if(k == 0) {
     /* __M4RI_CPU_L2_CACHE == __M4RI_PLE_NTABLES * 2^k * B->width * 8 */
     k = (int)log2((__M4RI_CPU_L2_CACHE/8)/(double)A->width/(double)__M4RI_PLE_NTABLES);
+
+    rci_t const klog = round(0.75 * log2_floor(MIN(nrows, ncols)));
+
+    if(klog < k)
+      k = klog;
+
     if (k<2)
       k=2;
     else if(k>8)
       k=8;
   }
+
   int kk = __M4RI_PLE_NTABLES * k;
   assert(kk <= m4ri_radix);