Commits

Anonymous committed 3d5627c

improved cache friendliness of column swaps in LQUP

Comments (0)

Files changed (2)

     /* undo permutation */
     mzd_apply_p_right_trans(A11, Q2);
 
+
+    permutation *tmp = mzp_init(A->ncols);
     for(i=0, j=n1; j<n1+r2; i++, j++) {
-      mzd_col_swap(A, r1 + i, n1 + Q2->values[i]);
+      //mzd_col_swap(A, r1 + i, n1 + Q2->values[i]);
+      tmp->values[r1+i] = Q2->values[i] + n1;
       Q->values[r1+i] = Q2->values[i] + n1;
     }
     for(i=r1+r2; i<ncols; i++) {
       Q->values[i] = i;
     }
+    mzd_apply_p_right(A, tmp);
+    mzp_free(tmp);
 
     mzp_free_window(Q2);
     mzp_free_window(P2);

src/permutation.c

   size_t i;
   if(A->nrows == 0)
     return;
-  for (i=0; i<P->length; i++) {
-    assert(P->values[i] >= i);
-    mzd_col_swap(A, i, P->values[i]);
+  const size_t step_size = MAX((CPU_L1_CACHE>>3)/A->width,1);
+  for(size_t j=0; j<A->nrows; j+=step_size) {
+    size_t stop_row = MIN(j+step_size, A->nrows);
+    for (i=0; i<P->length; ++i) {
+      assert(P->values[i] >= i);
+      mzd_col_swap_in_rows(A, i, P->values[i], j, stop_row);
+    }
   }
+/*   for (i=0; i<P->length; i++) { */
+/*     assert(P->values[i] >= i); */
+/*     mzd_col_swap(A, i, P->values[i]); */
+/*   } */
 }
 
 void mzd_apply_p_right_trans(packedmatrix *A, permutation *P) {
   int i;
+  if(A->nrows == 0)
+    return;
   const size_t step_size = MAX((CPU_L1_CACHE>>3)/A->width,1);
   for(size_t j=0; j<A->nrows; j+=step_size) {
     size_t stop_row = MIN(j+step_size, A->nrows);
-    for (i=P->length-1; i>=0; i--) {
+    for (i=P->length-1; i>=0; --i) {
       assert(P->values[i] >= i);
       mzd_col_swap_in_rows(A, i, P->values[i], j, stop_row);
     }
   }
 /*   long i; */
-/*   if(A->nrows == 0) */
-/*     return; */
 /*   for (i=P->length-1; i>=0; i--) { */
 /*     assert(P->values[i] >= i); */
 /*     mzd_col_swap(A, i, P->values[i]); */