1. CarloWood
  2. M4RI

Commits

CarloWood  committed b2eec38

Added row_offset and accessor functions for mzd_t using it.

row_offset is the distance in rows from the beginning of
block 0 to the first row. This allows to calculate the
following functions in just a few clock cycles. Note that
I had to reduce the size of offset, flags and blockrows_log
in order to keep sizeof(mzd_t) equal to 64 byte.

This patch adds the mzd_t functions:
mzd_first_row, mzd_first_row_next_block, mzd_row_to_block,
mzd_rows_in_block and mzd_row.

The canonical way to walk over all rows then becomes:

int n = 0;
rci_t row = 0;
word* ptr = mzd_first_row(M);
int count = mzd_rows_in_block(M, 0);
while(1) {
while (count--) {
assert(ptr == mzd_row(M, row) && ptr == M->rows[row++]);
ptr += M->rowstride;
}
if ((count = mzd_rows_in_block(M, ++n)) <= 0)
break;
ptr = mzd_first_row_next_block(M, n);
}
assert(M->ncols == 0 || row == M->nrows);

Also fixed a bug where mzd_flag_multiple_blocks was set even
if a windowed matrix fell completely inside a single block.

Removed one multiplication and the only division from mzd_init_window.

Code was now tested with small blocks (down to 4 kB, although
that causes two tests to fail because then the gray code tables
don't always fit in one block anymore. Using 8 kB blocks is 100%
successful however).

  • Participants
  • Parent commits 3314a04
  • Branches default

Comments (0)

Files changed (5)

File configure.ac

View file
 AC_ARG_ENABLE([debug],
 	AS_HELP_STRING([--enable-debug], [Enable assert() statements for debugging.]))
 
-if test "x$enable_debug" = x"yes"; then
-   DEBUG_FLAGS="-g"
-   AC_SUBST(DEBUG_FLAGS)
-else
-   AC_DEFINE(NDEBUG,1,[Define whether debugging is enabled])
-fi
-
 AC_ARG_ENABLE([debug-dump],
         AS_HELP_STRING([--enable-debug-dump], [Dump output at exit of every function.]))
 
 fi
 AC_SUBST(M4RI_DEBUG_MZD)
 
+if test "x$enable_debug" = x"yes"; then
+  DEBUG_FLAGS="-g"
+  AC_SUBST(DEBUG_FLAGS)
+else
+  if test "x$enable_debug_mzd" != "xyes"; then
+    AC_DEFINE(NDEBUG,1,[Define whether debugging is enabled])
+  fi
+fi
+
 # For the testsuite. Detect if PAPI is installed. See http://icl.cs.utk.edu/papi/ .
 
 if test -z "$m4ri_config_papi"; then

File src/debug_dump.c

View file
 static inline void consistency_check_row(mzd_t const *M, rci_t row)
 {
   assert(row >= 0 && row < M->nrows);
+  assert(M->rows[row] == mzd_row(M, row));
   if (mzd_is_windowed(M))
     return;
   // Check that the excess bits are zero.
   assert(((M->flags & mzd_flag_nonzero_excess) == 0) == ((M->ncols + M->offset) % m4ri_radix == 0));
   assert((M->flags & mzd_flag_windowed_zerooffset) == 0 || M->offset == 0);
   assert((M->flags & mzd_flag_windowed_zeroexcess) == 0 || ((M->ncols + M->offset) % m4ri_radix == 0));
-  assert(M->blocks != NULL || mzd_is_windowed(M));
-  assert(M->blocks == NULL || (((M->flags & mzd_flag_multiple_blocks) == 0) == (M->blocks[1].size == 0)));
+  assert((((M->flags & mzd_flag_multiple_blocks) == 0) == (mzd_row_to_block(M, M->nrows - 1) == 0)));
+  int n = 0;
+  rci_t counted = 0;
+  word* ptr = mzd_first_row(M);
+  int row_count = mzd_rows_in_block(M, 0);
+  while(1) {
+    while (row_count--) {
+      assert(ptr == M->rows[counted++]);
+      ptr += M->rowstride;
+    }
+    ++n;
+    row_count = mzd_rows_in_block(M, n);
+    if (row_count <= 0)
+      break;
+    ptr = mzd_first_row_next_block(M, n);
+  }
+  assert(M->ncols == 0 || counted == M->nrows);
   if (mzd_is_windowed(M))
     return;
   assert(M->rowstride == M->width || (M->rowstride == M->width + 1 && M->width >= mzd_paddingwidth));

File src/packedmatrix.c

View file
   A->flags = (A->high_bitmask != m4ri_ffff) ? mzd_flag_nonzero_excess : 0;
   A->offset = 0;
   A->offset_vector = 0;
+  A->row_offset = 0;
 
   A->rows = (word**)m4ri_mmc_calloc(r + 1, sizeof(word*)); // We're overcomitting here.
 
 }
 
 /*
-   Explanation of offset_vector (in words)
+   Explanation of offset_vector (in words), and row_offset.
 
    <------------------------------- row_stride (in words)--------------------->
    .---------------------------------------------------------------------------.  <-- m->blocks[0].begin   ^
-   |                                                                          /|                           |
-   |                                                        m->offset_vector_/ |                           |
-   |                                                                        /  |                           |
+   |                                 ^                                        /|                           |
+   |                    m->row_offset|                      m->offset_vector_/ |                           |
+   |                                 v                                      /  |                           |
    |  .--------------------------------------------------------------------v<--|---- m->rows[0]            |_ skipped_blocks (in blocks)
    |  |m (also a window)             ^                                     |   |                           |
    |  |                              |                                     |   |                           |
    `---------------------------------|-----------------------------------------'                           v
    .---------------------------------|----------------------------------------_.  <-- m->blocks[1].begin <-- windows.blocks[0].begin
-   |  |                          lowr|                                     |_^ |
-   |  |                              |            window->offset_vector _-^|   |
-   |  |                              v                               _-^   |   |                     
+   |  |                      ^   lowr|                                     |_^ |
+   |  |    window->row_offset|       |            window->offset_vector _-^|   |
+   |  |                      v       v                               _-^   |   |                     
    |  |  .----------------------------------------------------------v<--.  |<--|---- m->rows[lowr]
    |  |  |window                                                    |    `-|---|---- window->rows[0]
    |  |  |                                                          |      |   |
   window->flags |= ((ncols + window->offset) % m4ri_radix == 0) ? mzd_flag_windowed_zeroexcess : mzd_flag_nonzero_excess;
   window->blockrows_log = m->blockrows_log;
   //window->blockrows_mask = m->blockrows_mask;
-  int const skipped_blocks = (m->offset_vector + lowr * window->rowstride) / m->blocks[0].size;
+  wi_t const blockrows_mask = (1 << window->blockrows_log) - 1;
+  int const skipped_blocks = (m->row_offset + lowr) >> window->blockrows_log;
   assert(skipped_blocks == 0 || ((m->flags & mzd_flag_multiple_blocks)));
+  window->row_offset = (m->row_offset + lowr) & blockrows_mask;
   window->blocks = &m->blocks[skipped_blocks];
   wi_t const wrd_offset = (lowc + m->offset) / m4ri_radix;
-  window->offset_vector = (m->offset_vector + lowr * window->rowstride + wrd_offset) - skipped_blocks * m->blocks[0].size;
+  window->offset_vector = (m->offset_vector + wrd_offset) + (window->row_offset - m->row_offset) * window->rowstride;
   if(nrows)
     window->rows = (word**)m4ri_mmc_calloc(nrows + 1, sizeof(word*));
   else
   for(rci_t i = 0; i < nrows; ++i) {
     window->rows[i] = m->rows[lowr + i] + wrd_offset;
   }
-  if (window->blocks[1].size)
+  if (mzd_row_to_block(window, nrows - 1) > 0)
     window->flags |= m->flags & mzd_flag_multiple_blocks;
 
   /* offset_vector is the distance from the start of the first block to the first word of the first row. */

File src/packedmatrix.h

View file
   wi_t offset_vector;
 
   /**
+   * Number of rows to the first row counting from the start of the first block.
+   */
+
+  wi_t row_offset;
+
+  /**
+   * column offset of the first column.
+   */
+
+  uint16_t offset;
+
+  /**
    * Booleans to speed up things.
    *
    * The bits have the following meaning:
    * 4: Spans more than 1 block.
    */
 
-  int flags;
-
-  /**
-   * column offset of the first column.
-   */
-
-  int offset;
+  uint8_t flags;
 
   /**
    * blockrows_log = log2(blockrows);
    * where blockrows is the number of rows in one block, which is a power of 2.
    */
 
-  int blockrows_log;
+  uint8_t blockrows_log;
 
 #if 0	// Commented out in order to keep the size of mzd_t 64 bytes (one cache line). This could be added back if rows was ever removed.
   /**
  */
 static wi_t const mzd_paddingwidth = 3;
 
-static int const mzd_flag_nonzero_offset = 0x1;
-static int const mzd_flag_nonzero_excess = 0x2;
-static int const mzd_flag_windowed_zerooffset = 0x4;
-static int const mzd_flag_windowed_zeroexcess = 0x8;
-static int const mzd_flag_windowed_ownsblocks = 0x10;
-static int const mzd_flag_multiple_blocks = 0x20;
+static uint8_t const mzd_flag_nonzero_offset = 0x1;
+static uint8_t const mzd_flag_nonzero_excess = 0x2;
+static uint8_t const mzd_flag_windowed_zerooffset = 0x4;
+static uint8_t const mzd_flag_windowed_zeroexcess = 0x8;
+static uint8_t const mzd_flag_windowed_ownsblocks = 0x10;
+static uint8_t const mzd_flag_multiple_blocks = 0x20;
 
 /**
  * \brief Test if a matrix is windowed.
  *
+ * \param M Matrix
+ *
  * \return a non-zero value if the matrix is windowed, otherwise return zero.
  */
 static inline int mzd_is_windowed(mzd_t const *M) {
 /**
  * \brief Test if this mzd_t should free blocks.
  *
+ * \param M Matrix
+ *
  * \return TRUE iff blocks is non-zero and should be freed upon a call to mzd_free.
  */
 static inline int mzd_owns_blocks(mzd_t const *M) {
 }
 
 /**
+ * \brief Get a pointer the first word.
+ *
+ * \param M Matrix
+ *
+ * \return a pointer to the first word of the first row.
+ */
+
+static inline word* mzd_first_row(mzd_t const *M) {
+  word* result = M->blocks[0].begin + M->offset_vector;
+  assert(M->nrows == 0 || result == M->rows[0]);
+  return result;
+}
+
+/**
+ * \brief Get a pointer to the first word in block n.
+ *
+ * Use mzd_first_row for block number 0.
+ *
+ * \param M Matrix
+ * \param n The block number. Must be larger than 0.
+ *
+ * \return a pointer to the first word of the first row in block n.
+ */
+static inline word* mzd_first_row_next_block(mzd_t const* M, int n) {
+  assert(n > 0);
+  return M->blocks[n].begin + M->offset_vector - M->row_offset * M->rowstride;
+}
+
+/**
+ * \brief Convert row to blocks index.
+ *
+ * \param M Matrix.
+ * \param row The row to convert.
+ *
+ * \return the block number that contains this row.
+ */
+
+static inline int mzd_row_to_block(mzd_t const* M, rci_t row) {
+  return (M->row_offset + row) >> M->blockrows_log;
+}
+
+/**
+ * \brief Total number of rows in this block.
+ *
+ * Should be called with a constant n=0, or with
+ * n > 0 when n is a variable, for optimization
+ * reasons.
+ *
+ * \param M Matrix
+ * \param n The block number.
+ *
+ * \return the total number of rows in this block.
+ */
+
+static inline wi_t mzd_rows_in_block(mzd_t const* M, int n) {
+  if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
+    if (__M4RI_UNLIKELY(n == 0)) {
+      return (1 << M->blockrows_log) - M->row_offset;
+    } else {
+      int const last_block = mzd_row_to_block(M, M->nrows - 1); 
+      if (n < last_block)
+	return (1 << M->blockrows_log);
+      return M->nrows + M->row_offset - (n << M->blockrows_log);
+    }
+  }
+  return n ? 0 : M->nrows;
+}
+
+/**
+ * \brief Get pointer to first word of row.
+ *
+ * \param M Matrix
+ * \param row The row index.
+ *
+ * \return pointer to first word of the row.
+ */
+
+static inline word* mzd_row(mzd_t const* M, rci_t row) {
+  wi_t big_vector = M->offset_vector + row * M->rowstride;
+  word* result = M->blocks[0].begin + big_vector;
+  if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
+    int const n = (M->row_offset + row) >> M->blockrows_log;
+    result = M->blocks[n].begin + big_vector - n * (M->blocks[0].size / sizeof(word));
+  }
+  assert(result == M->rows[row]);
+  return result;
+}
+
+/**
  * \brief Create a new matrix of dimension r x c.
  *
  * Use mzd_free to kill it.

File src/pls_mmpf.c

View file
 
 /* create a table of all 2^k linear combinations */
 void mzd_make_table_pls(mzd_t const *M, rci_t r, rci_t c, int k, mzd_t *T, rci_t *Le, rci_t *Lm) {
-  assert(T->blocks[1].size == 0);
+  assert(!(T->flags & mzd_flag_multiple_blocks));	// Note that this restricts the number of columns of any matrix to __M4RI_MAX_MZD_BLOCKSIZE * radix / twokay = 268 million.
   wi_t const blockoffset= c / m4ri_radix;
   int const twokay= __M4RI_TWOPOW(k);
   wi_t const wide = T->width - blockoffset;