Commits

Martin Albrecht  committed 57ef83c

some performance improvements for sparse-ish matrices

  • Participants
  • Parent commits 81c73f6

Comments (0)

Files changed (3)

File src/pluq_mmpf.c

   size_t i, l, curr_pos;
   int found;
 
+  size_t bm[k];
+  size_t os[k];
+
   for(curr_pos=0; curr_pos < (size_t)k; curr_pos++) {
+    os[curr_pos] = (start_col+curr_pos)/RADIX;
+    bm[curr_pos] = ONE<<(RADIX-(start_col+curr_pos)%RADIX-1);
     found = 0;
     /* search for some pivot */
     for(i = start_row + curr_pos; i < stop_row; i++) {
-      /* clear before but preserve transformation matrix */
-      for(l = 0; l < curr_pos; l++)
-        if(done[l] < i) {
-          if(mzd_read_bit(A, i, start_col + l))
-            mzd_row_add_offset(A, i, start_row + l, start_col + l + 1);
-          done[l] = i; /* encode up to which row we added for l already */
+      const word tmp = mzd_read_bits(A, i, start_col, curr_pos+1);
+      if(tmp) {
+        word *Arow = A->rows[i];
+        /* clear before but preserve transformation matrix */
+        for (l=0; l<curr_pos; l++)
+          if(done[l] < i) {
+            if(Arow[os[l]] & bm[l])
+              mzd_row_add_offset(A, i, start_row + l, start_col + l + 1);
+            done[l] = i; /* encode up to which row we added for l already */
+          }
+        if(mzd_read_bit(A, i, start_col + curr_pos)) {
+          found = 1;
+          break;
         }
-        
-      if(mzd_read_bit(A, i, start_col + curr_pos)) {
-        found = 1;
-        break;
       }
     }
-    
     if(!found) {
       return curr_pos;
     }
         P->values[curr_row] = i;
         Q->values[curr_row] = j;
         mzd_row_swap(A, curr_row, i);
+        const size_t wrd = j/RADIX;
+        const size_t bm = ONE<<(RADIX-(j%RADIX)-1);
         for(size_t l = curr_row+1; l<nrows; l++)
-          if(mzd_read_bit(A, l, j))
+          if(A->rows[l][wrd] & bm)
             mzd_row_add_offset(A, l, curr_row, j + 1);
         curr_col = j + 1;
         curr_row++;
       kk = kk/2;
   }
 
-/*   printf("\nBefore\n"); */
-/*   mzd_print(A); */
   /* Now compressing L*/
   for (size_t j = 0; j<curr_row; ++j){
     if (Q->values[j]>j) {
-      // To be optimized by a copy_row function
-      mzd_col_swap_in_rows (A,Q->values[j], j, j, curr_row);
-      //for(size_t i=j; i<A->nrows; i++)
-      //mzd_write_bit(A,i,Q->values[j],0);
+      mzd_col_swap_in_rows(A,Q->values[j], j, j, curr_row);
     }
   }
   mzp_t *Qbar = mzp_init_window(Q,0,curr_row);
   mzd_apply_p_right_trans_even_capped(A, Qbar, curr_row, 0);
   mzp_free_window(Qbar);
-/*   printf("\nAfter"); */
-/*   mzd_print(A); */
-/*   printf("\n"); */
 
   mzd_free(U);
   mzd_free(T0);

File testsuite/Makefile

 CFLAGS=-I.. -std=c99
-LDFLAGS=-L../.libs/ -lm4ri -fopenmp
+LDFLAGS=-L../.libs/ -lm4ri 
 DEBUG=-ggdb
 
 TEST_PRGS=test_elimination test_multiplication test_trsm test_lqup test_solve test_kernel

File testsuite/test_lqup.c

       }
     }
   if (status){
-    printf("\n");
-    mzd_print(Acopy);
     printf(" ... FAILED\n");
   }  else
     printf (" ... passed\n");