Commits

CarloWood  committed 3314a04

Added mzd_t::offset_vector and made mzd_t::blocks non-zero also for windowed matrices.

After this patch, also for windowed matrices, you can find the first
word of the first row with M->blocks[0].begin + M->offset_vector.

Subsequently you can find the next rows by adding rowstride until
the resulting pointer >= M->blocks[n].end. Then increment n and
set the pointer to the first word in the next block with
M->blocks[n].begin + (M->offset_vector % M->rowstride).

For example, to run over all rows of an arbitrary matrix:

rci_t r = 0; // Only used for assertion that we checked all rows.
int n = 0; // Current block number.
rci_t counted = 0; // Number of rows processed already.
word* row = M->blocks[n].begin + M->offset_vector;

// Run over all blocks.
while(1) {
// Number of rows till the end of the current block.
int row_count = (M->blocks[n].end + M->rowstride - 1 - row) / M->rowstride;
// Don't go passed the end of the matrix.
row_count = MIN(row_count, M->nrows - counted);
// Keep track of how many we already processed.
counted += row_count;

// START INNER LOOP
while (row_count--) {
assert(row == M->rows[r++]); // Assertion to check it works and count the total number of rows.
row += M->rowstride;
}
// END INNER LOOP

if (!M->blocks[++n].size) // Was this the last block?
break;

// Position row at first word of first row in the next block.
row = M->blocks[n].begin + (M->offset_vector % M->rowstride);
}

// Make sure we checked all rows.
assert(M->ncols == 0 || r == M->nrows);

  • Participants
  • Parent commits ca4dc22

Comments (0)

Files changed (5)

File src/debug_dump.c

   assert(M->width * m4ri_radix >= M->ncols + M->offset);
   assert((M->width - 1) * m4ri_radix < M->ncols + M->offset);
   assert(M->width < mzd_paddingwidth || (M->rowstride & 1) == 0);
-  assert((M->blockrows_mask + 1) == (1 << M->blockrows_log));
+  //assert((M->blockrows_mask + 1) == (1 << M->blockrows_log));
   assert((1 << M->blockrows_log) * M->rowstride <= __M4RI_MAX_MZD_BLOCKSIZE);
   assert((1 << M->blockrows_log) * M->rowstride > __M4RI_MAX_MZD_BLOCKSIZE / 2);
   assert((M->width > 1 && M->low_bitmask == __M4RI_RIGHT_BITMASK(m4ri_radix - M->offset)) ||

File src/packedmatrix.c

   }
   A->flags = (A->high_bitmask != m4ri_ffff) ? mzd_flag_nonzero_excess : 0;
   A->offset = 0;
+  A->offset_vector = 0;
 
   A->rows = (word**)m4ri_mmc_calloc(r + 1, sizeof(word*)); // We're overcomitting here.
 
     while(blockrows >>= 1)
       A->blockrows_log++;
     blockrows = 1 << A->blockrows_log;
-    A->blockrows_mask = blockrows - 1;
+    //A->blockrows_mask = blockrows - 1;
+    int const blockrows_mask = blockrows - 1;
     int const nblocks = (r + blockrows - 1) / blockrows;
     A->flags |= (nblocks > 1) ? mzd_flag_multiple_blocks : 0;
     A->blocks = (mzd_block_t*)m4ri_mmc_calloc(nblocks + 1, sizeof(mzd_block_t));
     }
 
     for(rci_t i = 0; i < A->nrows; ++i) {
-      A->rows[i] = A->blocks[i >> A->blockrows_log].begin + (i & A->blockrows_mask) * A->rowstride;
+      A->rows[i] = A->blocks[i >> A->blockrows_log].begin + (i & blockrows_mask) * A->rowstride;
 #ifdef M4RI_WRAPWORD
       word::init_array(A->rows[i], A->width);
 #endif
   return A;
 }
 
+/*
+   Explanation of offset_vector (in words)
+
+   <------------------------------- row_stride (in words)--------------------->
+   .---------------------------------------------------------------------------.  <-- m->blocks[0].begin   ^
+   |                                                                          /|                           |
+   |                                                        m->offset_vector_/ |                           |
+   |                                                                        /  |                           |
+   |  .--------------------------------------------------------------------v<--|---- m->rows[0]            |_ skipped_blocks (in blocks)
+   |  |m (also a window)             ^                                     |   |                           |
+   |  |                              |                                     |   |                           |
+   `---------------------------------|-----------------------------------------'                           v
+   .---------------------------------|----------------------------------------_.  <-- m->blocks[1].begin <-- windows.blocks[0].begin
+   |  |                          lowr|                                     |_^ |
+   |  |                              |            window->offset_vector _-^|   |
+   |  |                              v                               _-^   |   |                     
+   |  |  .----------------------------------------------------------v<--.  |<--|---- m->rows[lowr]
+   |  |  |window                                                    |    `-|---|---- window->rows[0]
+   |  |  |                                                          |      |   |
+   `---------------------------------------------------------------------------'
+   .---------------------------------------------------------------------------.  <-- m->blocks[2].begin <-- windows.blocks[1].begin
+   |  |  |                                                          |      |   |
+   |  |  |                                                          | lowc |   |
+   |  |  |                                                          |<---->|   |
+   |  |  |                                                          |   \__|___|__ also wrd_offset (in words)
+   |  |  `----------------------------------------------------------'      |   |
+   |  `--------------------------------------------------------------------'   |
+   `---------------------------------------------------------------------------'
+   .---------------------------------------------------------------------------.
+   |                                                                           |
+
+*/
+
 mzd_t *mzd_init_window (mzd_t *m, rci_t lowr, rci_t lowc, rci_t highr, rci_t highc) {
   rci_t nrows, ncols;
   mzd_t *window;
     window->high_bitmask = __M4RI_LEFT_BITMASK((ncols + window->offset) % m4ri_radix);
     window->low_bitmask = __M4RI_RIGHT_BITMASK(m4ri_radix - window->offset);
   }
-  window->flags = m->flags & mzd_flag_multiple_blocks;
-  window->flags |= (window->offset == 0) ? mzd_flag_windowed_zerooffset : mzd_flag_nonzero_offset;
+  window->flags = (window->offset == 0) ? mzd_flag_windowed_zerooffset : mzd_flag_nonzero_offset;
   window->flags |= ((ncols + window->offset) % m4ri_radix == 0) ? mzd_flag_windowed_zeroexcess : mzd_flag_nonzero_excess;
   window->blockrows_log = m->blockrows_log;
-  window->blockrows_mask = m->blockrows_mask;
-  window->blocks = NULL;
-
+  //window->blockrows_mask = m->blockrows_mask;
+  int const skipped_blocks = (m->offset_vector + lowr * window->rowstride) / m->blocks[0].size;
+  assert(skipped_blocks == 0 || ((m->flags & mzd_flag_multiple_blocks)));
+  window->blocks = &m->blocks[skipped_blocks];
+  wi_t const wrd_offset = (lowc + m->offset) / m4ri_radix;
+  window->offset_vector = (m->offset_vector + lowr * window->rowstride + wrd_offset) - skipped_blocks * m->blocks[0].size;
   if(nrows)
     window->rows = (word**)m4ri_mmc_calloc(nrows + 1, sizeof(word*));
   else
     window->rows = NULL;
-  wi_t const offset = (lowc + m->offset) / m4ri_radix;
   for(rci_t i = 0; i < nrows; ++i) {
-    window->rows[i] = m->rows[lowr + i] + offset;
+    window->rows[i] = m->rows[lowr + i] + wrd_offset;
   }
-  
+  if (window->blocks[1].size)
+    window->flags |= m->flags & mzd_flag_multiple_blocks;
+
+  /* offset_vector is the distance from the start of the first block to the first word of the first row. */
+  assert(nrows == 0 || window->blocks[0].begin + window->offset_vector == window->rows[0]);
+
   __M4RI_DD_MZD(window);
   return window;
 }
 
-
 void mzd_free(mzd_t *A) {
   if(A->rows)
     m4ri_mmc_free(A->rows, (A->nrows + 1) * sizeof(word*));
-  if(A->blocks) {
+  if(mzd_owns_blocks(A)) {
     int i;
     for(i = 0; A->blocks[i].size; ++i) {
       m4ri_mmc_free(A->blocks[i].begin, A->blocks[i].size);
       N->ncols -= P->offset;
       N->offset = P->offset;
       N->width = P->width;
-      N->flags |= mzd_flag_nonzero_offset;
+      N->flags |= (mzd_flag_nonzero_offset | mzd_flag_windowed_ownsblocks);
       N->low_bitmask &= m4ri_ffff << N->offset;
       if (N->width == 1)
 	N->high_bitmask = N->low_bitmask;

File src/packedmatrix.h

   wi_t rowstride;
 
   /**
+   * Offset in words from start of block to first word.
+   *
+   * rows[0] = blocks[0].begin + offset_vector;
+   * This, together with rowstride, makes the rows array obsolete.
+   */
+
+  wi_t offset_vector;
+
+  /**
    * Booleans to speed up things.
    *
    * The bits have the following meaning:
 
   int blockrows_log;
 
+#if 0	// Commented out in order to keep the size of mzd_t 64 bytes (one cache line). This could be added back if rows was ever removed.
   /**
    * blockrows_mask = blockrows - 1;
    * where blockrows is the number of rows in one block, which is a power of 2.
    */
 
   int blockrows_mask;
+#endif
 
   /**
    * Mask for valid bits in the word with the highest index (width - 1).
 static int const mzd_flag_nonzero_excess = 0x2;
 static int const mzd_flag_windowed_zerooffset = 0x4;
 static int const mzd_flag_windowed_zeroexcess = 0x8;
-static int const mzd_flag_multiple_blocks = 0x10;
+static int const mzd_flag_windowed_ownsblocks = 0x10;
+static int const mzd_flag_multiple_blocks = 0x20;
 
 /**
  * \brief Test if a matrix is windowed.
 }
 
 /**
+ * \brief Test if this mzd_t should free blocks.
+ *
+ * \return TRUE iff blocks is non-zero and should be freed upon a call to mzd_free.
+ */
+static inline int mzd_owns_blocks(mzd_t const *M) {
+  return M->blocks && (!mzd_is_windowed(M) || ((M->flags & mzd_flag_windowed_ownsblocks)));
+}
+
+/**
  * \brief Create a new matrix of dimension r x c.
  *
  * Use mzd_free to kill it.

File src/pls_mmpf.c

     A->flags &= mzd_flag_multiple_blocks;
     A->flags |= (mzd_flag_windowed_zerooffset | mzd_flag_windowed_zeroexcess);
     A->high_bitmask = A->low_bitmask = m4ri_ffff;
+    /* No need to set mzd_flag_windowed_ownsblocks, because we won't free A until it's elements are restored below. */
   }
 
   int curr_pos;

File src/strassen.c

   C->ncols = m4ri_radix;
   C->flags &= mzd_flag_multiple_blocks;
   C->flags |= (mzd_flag_windowed_zerooffset | mzd_flag_windowed_zeroexcess);
+  /* No need to set mzd_flag_windowed_ownsblocks, because we won't free C until it's elements are restored below. */
   assert(C->width == 1);
   C->low_bitmask = C->high_bitmask = m4ri_ffff;
   word const mask = __M4RI_MIDDLE_BITMASK(B->ncols, B->offset);