# Commits

committed 3314a04

After this patch, also for windowed matrices, you can find the first
word of the first row with M->blocks[0].begin + M->offset_vector.

Subsequently you can find the next rows by adding rowstride until
the resulting pointer >= M->blocks[n].end. Then increment n and
set the pointer to the first word in the next block with
M->blocks[n].begin + (M->offset_vector % M->rowstride).

For example, to run over all rows of an arbitrary matrix:

rci_t r = 0; // Only used for assertion that we checked all rows.
int n = 0; // Current block number.
rci_t counted = 0; // Number of rows processed already.
word* row = M->blocks[n].begin + M->offset_vector;

// Run over all blocks.
while(1) {
// Number of rows till the end of the current block.
int row_count = (M->blocks[n].end + M->rowstride - 1 - row) / M->rowstride;
// Don't go passed the end of the matrix.
row_count = MIN(row_count, M->nrows - counted);
// Keep track of how many we already processed.
counted += row_count;

// START INNER LOOP
while (row_count--) {
assert(row == M->rows[r++]); // Assertion to check it works and count the total number of rows.
row += M->rowstride;
}
// END INNER LOOP

break;

// Position row at first word of first row in the next block.
row = M->blocks[n].begin + (M->offset_vector % M->rowstride);
}

// Make sure we checked all rows.
assert(M->ncols == 0 || r == M->nrows);

# src/debug_dump.c

`   assert(M->width * m4ri_radix >= M->ncols + M->offset);`
`   assert((M->width - 1) * m4ri_radix < M->ncols + M->offset);`
`   assert(M->width < mzd_paddingwidth || (M->rowstride & 1) == 0);`
`-  assert((M->blockrows_mask + 1) == (1 << M->blockrows_log));`
`+  //assert((M->blockrows_mask + 1) == (1 << M->blockrows_log));`
`   assert((1 << M->blockrows_log) * M->rowstride <= __M4RI_MAX_MZD_BLOCKSIZE);`
`   assert((1 << M->blockrows_log) * M->rowstride > __M4RI_MAX_MZD_BLOCKSIZE / 2);`
`   assert((M->width > 1 && M->low_bitmask == __M4RI_RIGHT_BITMASK(m4ri_radix - M->offset)) ||`

# src/packedmatrix.c

`   }`
`   A->flags = (A->high_bitmask != m4ri_ffff) ? mzd_flag_nonzero_excess : 0;`
`   A->offset = 0;`
`+  A->offset_vector = 0;`
` `
`   A->rows = (word**)m4ri_mmc_calloc(r + 1, sizeof(word*)); // We're overcomitting here.`
` `
`     while(blockrows >>= 1)`
`       A->blockrows_log++;`
`     blockrows = 1 << A->blockrows_log;`
`-    A->blockrows_mask = blockrows - 1;`
`+    //A->blockrows_mask = blockrows - 1;`
`+    int const blockrows_mask = blockrows - 1;`
`     int const nblocks = (r + blockrows - 1) / blockrows;`
`     A->flags |= (nblocks > 1) ? mzd_flag_multiple_blocks : 0;`
`     A->blocks = (mzd_block_t*)m4ri_mmc_calloc(nblocks + 1, sizeof(mzd_block_t));`
`     }`
` `
`     for(rci_t i = 0; i < A->nrows; ++i) {`
`-      A->rows[i] = A->blocks[i >> A->blockrows_log].begin + (i & A->blockrows_mask) * A->rowstride;`
`+      A->rows[i] = A->blocks[i >> A->blockrows_log].begin + (i & blockrows_mask) * A->rowstride;`
` #ifdef M4RI_WRAPWORD`
`       word::init_array(A->rows[i], A->width);`
` #endif`
`   return A;`
` }`
` `
`+/*`
`+   Explanation of offset_vector (in words)`
`+`
`+   <------------------------------- row_stride (in words)--------------------->`
`+   .---------------------------------------------------------------------------.  <-- m->blocks[0].begin   ^`
`+   |                                                                          /|                           |`
`+   |                                                        m->offset_vector_/ |                           |`
`+   |                                                                        /  |                           |`
`+   |  .--------------------------------------------------------------------v<--|---- m->rows[0]            |_ skipped_blocks (in blocks)`
`+   |  |m (also a window)             ^                                     |   |                           |`
`+   |  |                              |                                     |   |                           |`
`+   `---------------------------------|-----------------------------------------'                           v`
`+   .---------------------------------|----------------------------------------_.  <-- m->blocks[1].begin <-- windows.blocks[0].begin`
`+   |  |                          lowr|                                     |_^ |`
`+   |  |                              |            window->offset_vector _-^|   |`
`+   |  |                              v                               _-^   |   |                     `
`+   |  |  .----------------------------------------------------------v<--.  |<--|---- m->rows[lowr]`
`+   |  |  |window                                                    |    `-|---|---- window->rows[0]`
`+   |  |  |                                                          |      |   |`
`+   `---------------------------------------------------------------------------'`
`+   .---------------------------------------------------------------------------.  <-- m->blocks[2].begin <-- windows.blocks[1].begin`
`+   |  |  |                                                          |      |   |`
`+   |  |  |                                                          | lowc |   |`
`+   |  |  |                                                          |<---->|   |`
`+   |  |  |                                                          |   \__|___|__ also wrd_offset (in words)`
`+   |  |  `----------------------------------------------------------'      |   |`
`+   |  `--------------------------------------------------------------------'   |`
`+   `---------------------------------------------------------------------------'`
`+   .---------------------------------------------------------------------------.`
`+   |                                                                           |`
`+`
`+*/`
`+`
` mzd_t *mzd_init_window (mzd_t *m, rci_t lowr, rci_t lowc, rci_t highr, rci_t highc) {`
`   rci_t nrows, ncols;`
`   mzd_t *window;`
`     window->high_bitmask = __M4RI_LEFT_BITMASK((ncols + window->offset) % m4ri_radix);`
`     window->low_bitmask = __M4RI_RIGHT_BITMASK(m4ri_radix - window->offset);`
`   }`
`-  window->flags = m->flags & mzd_flag_multiple_blocks;`
`-  window->flags |= (window->offset == 0) ? mzd_flag_windowed_zerooffset : mzd_flag_nonzero_offset;`
`+  window->flags = (window->offset == 0) ? mzd_flag_windowed_zerooffset : mzd_flag_nonzero_offset;`
`   window->flags |= ((ncols + window->offset) % m4ri_radix == 0) ? mzd_flag_windowed_zeroexcess : mzd_flag_nonzero_excess;`
`   window->blockrows_log = m->blockrows_log;`
`-  window->blockrows_mask = m->blockrows_mask;`
`-  window->blocks = NULL;`
`-`
`+  //window->blockrows_mask = m->blockrows_mask;`
`+  int const skipped_blocks = (m->offset_vector + lowr * window->rowstride) / m->blocks[0].size;`
`+  assert(skipped_blocks == 0 || ((m->flags & mzd_flag_multiple_blocks)));`
`+  window->blocks = &m->blocks[skipped_blocks];`
`+  wi_t const wrd_offset = (lowc + m->offset) / m4ri_radix;`
`+  window->offset_vector = (m->offset_vector + lowr * window->rowstride + wrd_offset) - skipped_blocks * m->blocks[0].size;`
`   if(nrows)`
`     window->rows = (word**)m4ri_mmc_calloc(nrows + 1, sizeof(word*));`
`   else`
`     window->rows = NULL;`
`-  wi_t const offset = (lowc + m->offset) / m4ri_radix;`
`   for(rci_t i = 0; i < nrows; ++i) {`
`-    window->rows[i] = m->rows[lowr + i] + offset;`
`+    window->rows[i] = m->rows[lowr + i] + wrd_offset;`
`   }`
`-  `
`+  if (window->blocks[1].size)`
`+    window->flags |= m->flags & mzd_flag_multiple_blocks;`
`+`
`+  /* offset_vector is the distance from the start of the first block to the first word of the first row. */`
`+  assert(nrows == 0 || window->blocks[0].begin + window->offset_vector == window->rows[0]);`
`+`
`   __M4RI_DD_MZD(window);`
`   return window;`
` }`
` `
`-`
` void mzd_free(mzd_t *A) {`
`   if(A->rows)`
`     m4ri_mmc_free(A->rows, (A->nrows + 1) * sizeof(word*));`
`-  if(A->blocks) {`
`+  if(mzd_owns_blocks(A)) {`
`     int i;`
`     for(i = 0; A->blocks[i].size; ++i) {`
`       m4ri_mmc_free(A->blocks[i].begin, A->blocks[i].size);`
`       N->ncols -= P->offset;`
`       N->offset = P->offset;`
`       N->width = P->width;`
`-      N->flags |= mzd_flag_nonzero_offset;`
`+      N->flags |= (mzd_flag_nonzero_offset | mzd_flag_windowed_ownsblocks);`
`       N->low_bitmask &= m4ri_ffff << N->offset;`
`       if (N->width == 1)`
` 	N->high_bitmask = N->low_bitmask;`

# src/packedmatrix.h

`   wi_t rowstride;`
` `
`   /**`
`+   * Offset in words from start of block to first word.`
`+   *`
`+   * rows[0] = blocks[0].begin + offset_vector;`
`+   * This, together with rowstride, makes the rows array obsolete.`
`+   */`
`+`
`+  wi_t offset_vector;`
`+`
`+  /**`
`    * Booleans to speed up things.`
`    *`
`    * The bits have the following meaning:`
` `
`   int blockrows_log;`
` `
`+#if 0	// Commented out in order to keep the size of mzd_t 64 bytes (one cache line). This could be added back if rows was ever removed.`
`   /**`
`    * blockrows_mask = blockrows - 1;`
`    * where blockrows is the number of rows in one block, which is a power of 2.`
`    */`
` `
`   int blockrows_mask;`
`+#endif`
` `
`   /**`
`    * Mask for valid bits in the word with the highest index (width - 1).`
` static int const mzd_flag_nonzero_excess = 0x2;`
` static int const mzd_flag_windowed_zerooffset = 0x4;`
` static int const mzd_flag_windowed_zeroexcess = 0x8;`
`-static int const mzd_flag_multiple_blocks = 0x10;`
`+static int const mzd_flag_windowed_ownsblocks = 0x10;`
`+static int const mzd_flag_multiple_blocks = 0x20;`
` `
` /**`
`  * \brief Test if a matrix is windowed.`
` }`
` `
` /**`
`+ * \brief Test if this mzd_t should free blocks.`
`+ *`
`+ * \return TRUE iff blocks is non-zero and should be freed upon a call to mzd_free.`
`+ */`
`+static inline int mzd_owns_blocks(mzd_t const *M) {`
`+  return M->blocks && (!mzd_is_windowed(M) || ((M->flags & mzd_flag_windowed_ownsblocks)));`
`+}`
`+`
`+/**`
`  * \brief Create a new matrix of dimension r x c.`
`  *`
`  * Use mzd_free to kill it.`

# src/pls_mmpf.c

`     A->flags &= mzd_flag_multiple_blocks;`
`     A->flags |= (mzd_flag_windowed_zerooffset | mzd_flag_windowed_zeroexcess);`
`     A->high_bitmask = A->low_bitmask = m4ri_ffff;`
`+    /* No need to set mzd_flag_windowed_ownsblocks, because we won't free A until it's elements are restored below. */`
`   }`
` `
`   int curr_pos;`

# src/strassen.c

`   C->ncols = m4ri_radix;`
`   C->flags &= mzd_flag_multiple_blocks;`
`   C->flags |= (mzd_flag_windowed_zerooffset | mzd_flag_windowed_zeroexcess);`
`+  /* No need to set mzd_flag_windowed_ownsblocks, because we won't free C until it's elements are restored below. */`
`   assert(C->width == 1);`
`   C->low_bitmask = C->high_bitmask = m4ri_ffff;`
`   word const mask = __M4RI_MIDDLE_BITMASK(B->ncols, B->offset);`