Anonymous avatar Anonymous committed b3b0bd5

apply_p_right_trans() more cache friendly

Comments (0)

Files changed (3)

src/packedmatrix.c

 
   if(a_bit > b_bit) {
     const size_t offset = a_bit - b_bit;
+
     for (i=0; i<M->nrows; i++) {
       base = M->values + M->rowswap[i];
       a = *(base + a_word);
 
 }
 
-void mzd_col_swap_in_rows(packedmatrix *M, const size_t cola, const size_t colb, const size_t start_row, const size_t stop_row) {
-  if (cola == colb)
-    return;
-
-  const size_t _cola = cola + M->offset;
-  const size_t _colb = colb + M->offset;
-
-  const size_t a_word = _cola/RADIX;
-  const size_t b_word = _colb/RADIX;
-  const size_t a_bit = _cola%RADIX;
-  const size_t b_bit = _colb%RADIX;
-  
-  word a, b, *base;
-
-  size_t i;
-  
-  if(a_word == b_word) {
-    const word ai = RADIX - a_bit - 1;
-    const word bi = RADIX - b_bit - 1;
-    for (i=start_row; i<stop_row; i++) {
-      base = (M->values + M->rowswap[i] + a_word);
-      register word b = *base;
-      register word x = ((b >> ai) ^ (b >> bi)) & 1; // XOR temporary
-      *base = b ^ ((x << ai) | (x << bi));
-    }
-    return;
-  }
-
-  const word a_bm = (ONE<<(RADIX - (a_bit) - 1));
-  const word b_bm = (ONE<<(RADIX - (b_bit) - 1));
-
-  if(a_bit > b_bit) {
-    const size_t offset = a_bit - b_bit;
-    for (i=start_row; i<stop_row; i++) {
-      base = M->values + M->rowswap[i];
-      a = *(base + a_word);
-      b = *(base + b_word);
-
-      a ^= (b & b_bm) >> offset;
-      b ^= (a & a_bm) << offset;
-      a ^= (b & b_bm) >> offset;
-
-      *(base + a_word) = a;
-      *(base + b_word) = b;
-    }
-  } else {
-    const size_t offset = b_bit - a_bit;
-    for (i=start_row; i<stop_row; i++) {
-      base = M->values + M->rowswap[i];
-      a = *(base + a_word);
-      b = *(base + b_word);
-
-      a ^= (b & b_bm) << offset;
-      b ^= (a & a_bm) >> offset;
-      a ^= (b & b_bm) << offset;
-      *(base + a_word) = a;
-      *(base + b_word) = b;
-    }
-  }
-
-}
 
 int mzd_is_zero(packedmatrix *A) {
   /* Could be improved: stopping as the first non zero value is found (status!=0)*/

src/packedmatrix.h

  * \param stop_row Row index (exclusive).
  */
  
-void mzd_col_swap_in_rows(packedmatrix *M, const size_t cola, const size_t colb, const size_t start_row, const size_t stop_row);
+static inline void mzd_col_swap_in_rows(packedmatrix *M, const size_t cola, const size_t colb, const size_t start_row, const size_t stop_row) {
+  if (cola == colb)
+    return;
+
+  const size_t _cola = cola + M->offset;
+  const size_t _colb = colb + M->offset;
+
+  const size_t a_word = _cola/RADIX;
+  const size_t b_word = _colb/RADIX;
+  const size_t a_bit = _cola%RADIX;
+  const size_t b_bit = _colb%RADIX;
+  
+  word a, b, *base;
+
+  size_t i;
+  
+  if(a_word == b_word) {
+    const word ai = RADIX - a_bit - 1;
+    const word bi = RADIX - b_bit - 1;
+    for (i=start_row; i<stop_row; i++) {
+      base = (M->values + M->rowswap[i] + a_word);
+      register word b = *base;
+      register word x = ((b >> ai) ^ (b >> bi)) & 1; // XOR temporary
+      *base = b ^ ((x << ai) | (x << bi));
+    }
+    return;
+  }
+
+  const word a_bm = (ONE<<(RADIX - (a_bit) - 1));
+  const word b_bm = (ONE<<(RADIX - (b_bit) - 1));
+
+  if(a_bit > b_bit) {
+    const size_t offset = a_bit - b_bit;
+    for (i=start_row; i<stop_row; i++) {
+      base = M->values + M->rowswap[i];
+      a = *(base + a_word);
+      b = *(base + b_word);
+
+      a ^= (b & b_bm) >> offset;
+      b ^= (a & a_bm) << offset;
+      a ^= (b & b_bm) >> offset;
+
+      *(base + a_word) = a;
+      *(base + b_word) = b;
+    }
+  } else {
+    const size_t offset = b_bit - a_bit;
+    for (i=start_row; i<stop_row; i++) {
+      base = M->values + M->rowswap[i];
+      a = *(base + a_word);
+      b = *(base + b_word);
+
+      a ^= (b & b_bm) << offset;
+      b ^= (a & a_bm) >> offset;
+      a ^= (b & b_bm) << offset;
+      *(base + a_word) = a;
+      *(base + b_word) = b;
+    }
+  }
+
+}
 
 /**
  * \brief Read the bit at position M[row,col].

src/permutation.c

 }
 
 void mzd_apply_p_right_trans(packedmatrix *A, permutation *P) {
-/*   int i; */
-/*   const size_t step_size = MAX((CPU_L1_CACHE>>3)/A->width,1); */
-/*   for(size_t j=0; j<A->nrows; j+=step_size) { */
-/*     size_t stop_row = MIN(j+step_size, A->nrows); */
-/*     for (i=P->length-1; i>=0; i--) { */
-/*       assert(P->values[i] >= i); */
-/*       mzd_col_swap_in_rows(A, i, P->values[i], j, stop_row); */
-/*     } */
+  int i;
+  const size_t step_size = MAX((CPU_L1_CACHE>>3)/A->width,1);
+  for(size_t j=0; j<A->nrows; j+=step_size) {
+    size_t stop_row = MIN(j+step_size, A->nrows);
+    for (i=P->length-1; i>=0; i--) {
+      assert(P->values[i] >= i);
+      mzd_col_swap_in_rows(A, i, P->values[i], j, stop_row);
+    }
+  }
+/*   long i; */
+/*   if(A->nrows == 0) */
+/*     return; */
+/*   for (i=P->length-1; i>=0; i--) { */
+/*     assert(P->values[i] >= i); */
+/*     mzd_col_swap(A, i, P->values[i]); */
 /*   } */
-  long i;
-  if(A->nrows == 0)
-    return;
-  for (i=P->length-1; i>=0; i--) {
-    assert(P->values[i] >= i);
-    mzd_col_swap(A, i, P->values[i]);
-  }
 }
 
 void mzd_col_block_rotate(packedmatrix *M, size_t zs, size_t ze, size_t de, int copy) {
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.