Commits

CarloWood committed 9b8dcab

Add --enable-debug-dump.

When configured with --enable-debug-dump, print a trace
of (hash values of) output values and their function/location,
upon leaving any function that does something significant.

This can be used to quickly find the function that behaves
different in the case that some patch breaks the testsuite.

Comments (0)

Files changed (17)

 
 lib_LTLIBRARIES = libm4ri.la
 
-libm4ri_la_SOURCES = src/brilliantrussian.c src/misc.c src/packedmatrix.c src/grayflex.c src/strassen.c src/permutation.c src/trsm.c src/pls.c src/solve.c src/pls_mmpf.c src/echelonform.c src/mmc.c
+libm4ri_la_SOURCES = src/brilliantrussian.c src/misc.c src/packedmatrix.c src/grayflex.c src/strassen.c src/permutation.c src/trsm.c src/pls.c src/solve.c src/pls_mmpf.c src/echelonform.c src/mmc.c src/debug_dump.c
 BUILT_SOURCES = src/m4ri_config.h
 
 pkgincludesubdir = $(includedir)/m4ri
 AC_SUBST(M4RI_HAVE_OPENMP)
 
 # Debugging support
-AC_ARG_ENABLE(debug, [  --enable-debug          Enable assert() statements for debugging.])
+AC_ARG_ENABLE([debug],
+	AS_HELP_STRING([--enable-debug], [Enable assert() statements for debugging.]))
 
 if test "x$enable_debug" = x"yes"; then
    DEBUG_FLAGS="-g"
    AC_DEFINE(NDEBUG,1,[Define whether debugging is enabled])
 fi
 
+AC_ARG_ENABLE([debug-dump],
+        AS_HELP_STRING([--enable-debug-dump], [Dump output at exit of every function.]))
+
+if test "x$enable_debug_dump" = "xyes"; then
+  M4RI_DEBUG_DUMP=1
+else
+  M4RI_DEBUG_DUMP=0
+fi
+AC_SUBST(M4RI_DEBUG_DUMP)
+
 # For the testsuite. Detect if PAPI is installed. See http://icl.cs.utk.edu/papi/ .
 
 if test -z "$m4ri_config_papi"; then

src/brilliantrussian.c

       }
     }
     if (found == 0) {
-      return j - c;
+      break;
     }
   }
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_INT(j - c);
   return j - c;
 }
 
       }
     }
     if (found == 0) {
-      return j - c;
+      break;
     }
   }
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_INT(j - c);
   return j - c;
 }
 
     }
     ++start_row;
   }
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_INT(k);
   return k;
 }
 
       dst[j] = src[j];
     }
   }
+  __M4RI_DD_MZD(A);
 }
 
 void mzd_make_table(mzd_t const *M, rci_t r, rci_t c, int k, mzd_t *T, rci_t *L)
     case 1:  *ti++ = (*m++ ^ *ti1++) & mask_end;
     }
   }
+  __M4RI_DD_MZD(T);
+  __M4RI_DD_RCI_ARRAY(L, twokay);
 }
 
 void mzd_process_rows(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, mzd_t const *T, rci_t const *L) {
         } while (--n > 0);
       }
     }
+    __M4RI_DD_MZD(M);
     return;
   }
 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows2(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, mzd_t const *T0, rci_t const *L0, mzd_t const *T1, rci_t const *L1) {
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows3(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows4(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows5(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows6(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 rci_t _mzd_echelonize_m4ri(mzd_t *A, int const full, int k, int heuristic, double const threshold) {
   mzd_free(T5);
   m4ri_mm_free(L5);
   mzd_free(U);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_RCI(r);
   return r;
 }
 
   mzd_free(T5);
   m4ri_mm_free(L5);
   mzd_free(U);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_RCI(r);
   return r;
 }
 
   
   mzd_free(big);
   
+  __M4RI_DD_MZD(answer);
   return answer;
 }
 
   mzd_free(T8);
 #endif
   m4ri_mm_free(buffer);
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       }
     }
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 //#undef __M4RI_M4RM_GRAY8
   m4ri_mm_free(L6);
   m4ri_mm_free(L7);
 #endif
+
+  __M4RI_DD_MZD(B);
 }
+/******************************************************************************
+*
+*            M4RI: Linear Algebra over GF(2)
+*
+*    Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
+*
+*  Distributed under the terms of the GNU General Public License (GPL)
+*  version 2 or higher.
+*
+*    This code is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*    General Public License for more details.
+*
+*  The full text of the GPL is available at:
+*
+*                  http://www.gnu.org/licenses/
+******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "packedmatrix.h"
+#include "permutation.h"
+
+#if __M4RI_DEBUG_DUMP
+
+static unsigned long dd_sequence_number = 0;
+
+static void entry(char const* function, char const* file, int line)
+{
+  printf("Sequence#: %ld; %s @ %s:%d; ", dd_sequence_number, function, file, line);
+  ++dd_sequence_number;
+}
+
+static word calculate_hash(word const* rowptr, wi_t wide)
+{
+  unsigned long long hash = 0;
+  for (word const* ptr = rowptr; ptr < rowptr + wide; ++ptr)
+    hash ^= *ptr;
+  return hash;
+}
+
+static inline word rotate_word(word w, int shift)
+{
+  return (w << shift) | (w >> (m4ri_radix - w));
+}
+
+void m4ri_dd_int(char const* function, char const* file, int line, int i)
+{
+  entry(function, file, line);
+  printf("int: %d\n", i);
+}
+
+void m4ri_dd_rci(char const* function, char const* file, int line, rci_t rci)
+{
+  entry(function, file, line);
+  printf("rci: %d\n", rci);
+}
+
+void m4ri_dd_rci_array(char const* function, char const* file, int line, rci_t *rciptr, int len)
+{
+  entry(function, file, line);
+  unsigned long long hash = 0;
+  for (int i = 0; i < len; ++i)
+    hash ^= rotate_word(rciptr[i], i % m4ri_radix);
+  printf("rci array (size %d) hash: %llx\n", len, hash);
+}
+
+void m4ri_dd_rawrow(char const* function, char const* file, int line, word const* rowptr, wi_t wide)
+{
+  entry(function, file, line);
+  unsigned long long hash = calculate_hash(rowptr, wide);
+  printf("raw row (%d words) hash: %llx\n", wide, hash);
+}
+
+void m4ri_dd_row(char const* function, char const* file, int line, mzd_t const* M, rci_t row)
+{
+  entry(function, file, line);
+  unsigned long long hash = calculate_hash(M->rows[row], M->width);
+  printf("row %d hash: %llx\n", row, hash);
+}
+
+void m4ri_dd_mzd(char const* function, char const* file, int line, mzd_t const* M)
+{
+  entry(function, file, line);
+  unsigned long long hash = 0;
+  for (rci_t r = 0; r < M->nrows; ++r)
+    hash ^= rotate_word(calculate_hash(M->rows[r], M->width), r % m4ri_radix);
+  printf("mzd hash: %llx\n", hash);
+}
+
+void m4ri_dd_mzp(char const* function, char const* file, int line, mzp_t const* P)
+{
+  entry(function, file, line);
+  unsigned long long hash = 0;
+  for (rci_t i = 0; i < P->length; ++i)
+    hash ^= rotate_word(P->values[i], i % m4ri_radix);
+  printf("mzp hash: %llx\n", hash);
+}
+
+#endif
+/**
+ * \file debug_dump.h
+ *
+ * \brief Debug utility
+ * 
+ * \author Carlo Wood <carlo@alinoe.com>
+ *
+ * To enable dumping of output per function, configure the library with --enable-debug-dump.
+ */
+/******************************************************************************
+*
+*                 M4RI: Linear Algebra over GF(2)
+*
+*    Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
+*
+*  Distributed under the terms of the GNU General Public License (GPL)
+*  version 2 or higher.
+*
+*    This code is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*    General Public License for more details.
+*
+*  The full text of the GPL is available at:
+*
+*                  http://www.gnu.org/licenses/
+******************************************************************************/
+
+#ifndef M4RI_DOXYGEN
+
+#if __M4RI_DEBUG_DUMP
+
+struct mzd_t;
+struct mzp_t;
+
+extern void m4ri_dd_int(char const* function, char const* file, int line, int i);
+extern void m4ri_dd_rci(char const* function, char const* file, int line, rci_t rci);
+extern void m4ri_dd_rci_array(char const* function, char const* file, int line, rci_t *rciptr, int len);
+extern void m4ri_dd_rawrow(char const* function, char const* file, int line, word const* rowptr, wi_t wide);
+extern void m4ri_dd_row(char const* function, char const* file, int line, struct mzd_t const* M, rci_t row);
+extern void m4ri_dd_mzd(char const* function, char const* file, int line, struct mzd_t const* M);
+extern void m4ri_dd_mzp(char const* function, char const* file, int line, struct mzp_t const* P);
+
+#define __M4RI_DD_INT(i) m4ri_dd_int(__FUNCTION__, __FILE__, __LINE__, i)
+#define __M4RI_DD_RCI(rci) m4ri_dd_rci(__FUNCTION__, __FILE__, __LINE__, rci)
+#define __M4RI_DD_RCI_ARRAY(rciptr, len) m4ri_dd_rci_array(__FUNCTION__, __FILE__, __LINE__, rciptr, len)
+#define __M4RI_DD_RAWROW(rowptr, wide) m4ri_dd_rawrow(__FUNCTION__, __FILE__, __LINE__, rowptr, wide)
+#define __M4RI_DD_ROW(M, row) m4ri_dd_row(__FUNCTION__, __FILE__, __LINE__, M, row)
+#define __M4RI_DD_MZD(M) m4ri_dd_mzd(__FUNCTION__, __FILE__, __LINE__, M)
+#define __M4RI_DD_MZP(P) m4ri_dd_mzp(__FUNCTION__, __FILE__, __LINE__, P)
+
+#else // __M4RI_DEBUG_DUMP
+
+#define __M4RI_DD_INT(i)
+#define __M4RI_DD_RCI(rci)
+#define __M4RI_DD_RCI_ARRAY(rciptr, len)
+#define __M4RI_DD_RAWROW(rowptr, wide)
+#define __M4RI_DD_ROW(M, row)
+#define __M4RI_DD_MZD(M)
+#define __M4RI_DD_MZP(P)
+
+#endif // __M4RI_DEBUG_DUMP
+
+#endif // M4RI_DOXYGEN

src/echelonform.c

   
   mzp_free(P);
   mzp_free(Q);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_RCI(r);
   return r;
 }

src/m4ri_config.h.in

 #define __M4RI_HAVE_POSIX_MEMALIGN	@M4RI_HAVE_POSIX_MEMALIGN@
 #define __M4RI_HAVE_SSE2		@M4RI_HAVE_SSE2@
 #define __M4RI_HAVE_OPENMP		@M4RI_HAVE_OPENMP@
+#define __M4RI_DEBUG_DUMP		@M4RI_DEBUG_DUMP@
 
 // Helper macros.
 #define __M4RI_USE_MM_MALLOC		(__M4RI_HAVE_MM_MALLOC && __M4RI_HAVE_SSE2)

src/packedmatrix.c

     window->rows[i] = m->rows[lowr + i] + offset;
   }
   
+  __M4RI_DD_MZD(window);
   return window;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(M);
+  __M4RI_DD_RCI(pivots);
   return pivots;
 }
 
       DST->rows[64+k+j][1] ^= t[3];			// D
     }
   }
+
+  __M4RI_DD_MZD(DST);
   return DST;
 }
 
         mzd_write_bit(DST, j, i, mzd_read_bit(A, i, j));
       }
     }
+    __M4RI_DD_MZD(DST);
     return DST;
   }
 
       collect = 0;
     }
   }
+
+  __M4RI_DD_MZD(DST);
   return DST;
 }
 
   mzd_free_window(AT); mzd_free_window(CT);
   mzd_free_window(BT); mzd_free_window(DT);
   
+  __M4RI_DD_MZD(DST);
   return DST;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     for(rci_t j = 0; j < n; ++j)
       if (mzd_read_bit(v,i,j))
         mzd_combine(C,i,0, C,i,0, A,j,0);
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       A->rows[i][width] ^= (A->rows[i][width] ^ m4ri_random_word()) & mask_end;
     }
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 void mzd_set_ui( mzd_t *A, unsigned int value) {
     }
   }
 
-  if(value % 2 == 0)
+  if(value % 2 == 0) {
+    __M4RI_DD_MZD(A);
     return;
+  }
 
   rci_t const stop = MIN(A->nrows, A->ncols);
   for (rci_t i = 0; i < stop; ++i) {
     mzd_write_bit(A, i, i, 1);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 int mzd_equal(mzd_t const *A, mzd_t const *B) {
       m4ri_die("mzd_copy: completely unaligned copy not implemented yet.");
     }
   }
+
+  __M4RI_DD_MZD(N);
   return N;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       dst_truerow[j] = src_truerow[j]; 
     }
   }
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
   INV = mzd_submatrix(INV, H, 0, A->ncols, A->nrows, 2 * A->ncols);
 
   mzd_free(H);
+
+  __M4RI_DD_MZD(INV);
   return INV;
 }
 
     for(rci_t i = 0; i < nrows; ++i) {
       mzd_combine_weird(C,i,0, A,i,0, B,i,0);
     }
+    __M4RI_DD_MZD(C);
     return C;
   }
 
       mzd_combine_even(C,i,0, A,i,0, B,i,0);
     }
   }
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       }
     }
   }
+  __M4RI_DD_MZD(S);
   return S;
 }
 
       register word x = ((b >> a_bit) ^ (b >> b_bit)) & m4ri_one; // XOR temporary
       *base = b ^ ((x << a_bit) | (x << b_bit));
     }
+    __M4RI_DD_MZD(M);
     return;
   }
 
     }
   }
 
+  __M4RI_DD_MZD(M);
 }
 
-
 int mzd_is_zero(mzd_t const *A) {
   /* Could be improved: stopping as the first non zero value is found (status!=0) */
   rci_t const mb = A->nrows;
     b[c / m4ri_radix] &= __M4RI_LEFT_BITMASK(m4ri_radix - rest);
     b[c / m4ri_radix] |= temp;
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void mzd_copy_row(mzd_t *B, rci_t i, mzd_t const *A, rci_t j) {
   } else {
     b[0] = (b[0] & ~mask_begin) | (a[0] & mask_begin & mask_end) | (b[0] & ~mask_end);
   }
+
+  __M4RI_DD_ROW(B, i);
 }
 
 
   for (wi_t i = startblock + 1; i < M->width; ++i) {
     M->rows[row][i] = 0;
   }
+
+  __M4RI_DD_ROW(M, row);
 }
 
 
-int mzd_find_pivot(mzd_t const *A, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c) { 
+int mzd_find_pivot(mzd_t const *A, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c) {
   assert(A->offset == 0);
   rci_t const nrows = A->nrows;
   rci_t const ncols = A->ncols;
             break;
           }
         }
+	__M4RI_DD_RCI(*r);
+	__M4RI_DD_RCI(*c);
+	__M4RI_DD_INT(1);
         return 1;
       }
     }
           break;
         }
       }
+      __M4RI_DD_RCI(*r);
+      __M4RI_DD_RCI(*c);
+      __M4RI_DD_INT(1);
       return 1;
     }
     /* handle complete words */
             break;
           }
         }
+	__M4RI_DD_RCI(*r);
+	__M4RI_DD_RCI(*c);
+	__M4RI_DD_INT(1);
         return 1;
       }
     }
           break;
         }
       }
+      __M4RI_DD_RCI(*r);
+      __M4RI_DD_RCI(*c);
+      __M4RI_DD_INT(1);
       return 1;
     }
   }
+  __M4RI_DD_RCI(*r);
+  __M4RI_DD_RCI(*c);
+  __M4RI_DD_INT(0);
   return 0;
 }
 
     for (wi_t j = 1; j < end; ++j)
       tmp |= row[j];
     tmp |= row[end] & mask_end;
-    if(tmp)
+    if(tmp) {
+      __M4RI_DD_INT(i + 1);
       return i + 1;
+    }
   }
+  __M4RI_DD_INT(0);
   return 0;
 }

src/packedmatrix.h

 #endif
 
 #include "misc.h"
+#include "debug_dump.h"
 
 #if __M4RI_HAVE_SSE2
 /**
  * The most fundamental data type in this library.
  */
 
-typedef struct {
+typedef struct mzd_t {
   /**
    * Contains pointers to the actual blocks of memory containing the
    * values packed into words of size m4ri_radix.
   tmp = (a[width] ^ b[width]) & mask_end;
   a[width] ^= tmp;
   b[width] ^= tmp;
+
+  __M4RI_DD_ROW(M, rowa);
+  __M4RI_DD_ROW(M, rowb);
 }
 
 /**
     a[0] ^= tmp;
     b[0] ^= tmp;
   }
+
+  __M4RI_DD_ROW(M, rowa);
+  __M4RI_DD_ROW(M, rowb);
 }
 
 /**
       x |= x << coldiff;			/* Duplicate this bit at both column positions. */
       *vp = v ^ x;				/* Swap column bits and store result. */
     }
+    __M4RI_DD_MZD(M);
     return;
   }
 
     *(base + a_word) = a;
     *(base + b_word) = b;
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 /**
    * that we last accessed, in the previous loop.
    */
   dst[i - 1] ^= src[i - 1] & ~mask_end;
+
+  __M4RI_DD_ROW(M, dstrow);
 }
 
 /**
     mzd_clear_bits(C, c_row, i, (C->ncols - i));
     mzd_xor_bits(C, c_row, i, (C->ncols - i), tmp);
   }
-  return;
+
+  __M4RI_DD_MZD(C);
 }
 
 /**
   }
 
   *a ^= *b & __M4RI_LEFT_BITMASK(A->ncols%m4ri_radix);
-  return;
+
+  __M4RI_DD_MZD(A);
 }
 
 
     }
   }
   *c ^= ((*a ^ *b ^ *c) & __M4RI_LEFT_BITMASK(C->ncols%m4ri_radix));
-  return;
+
+  __M4RI_DD_MZD(C);
 }
 
 

src/permutation.c

   mzp_t *window = (mzp_t *)m4ri_mm_malloc(sizeof(mzp_t));
   window->values = P->values + begin;
   window->length = end - begin;
+  __M4RI_DD_MZP(window);
   return window;
 }
 
       Arow[a_word] |= value;
     }
   }
+
+  __M4RI_DD_MZD(A);
 }
+
 /**
  * Implements both apply_p_right and apply_p_right_trans.
  */
   m4ri_mm_free(permutation);
   m4ri_mm_free(write_mask);
   mzd_free(B);
+
+  __M4RI_DD_MZD(A);
 }
 
 void _mzd_apply_p_right_trans(mzd_t *A, mzp_t const *P) {
 /*     assert(P->values[i] >= i); */
 /*     mzd_col_swap(A, i, P->values[i]); */
 /*   } */
+
+  __M4RI_DD_MZD(A);
 }
 
 void _mzd_apply_p_right(mzd_t *A, mzp_t const *P) {
 /*     assert(P->values[i] >= i); */
 /*     mzd_col_swap(A, i, P->values[i]); */
 /*   } */
+
+  __M4RI_DD_MZD(A);
 }
 
 
       mzd_col_swap_in_rows(A, i, P->values[i], r, MIN(row_bound, i));
     }
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 void _mzd_compress_l(mzd_t *A, rci_t r1, rci_t n1, rci_t r2) {
   }
  
 #endif
+
+  __M4RI_DD_MZD(A);
 }
 

src/permutation.h

  * \brief Permutations.
  */
 
-typedef struct {
+typedef struct mzp_t {
   /**
    * The swap operations in LAPACK format.
    */
     mzd_free_window(A10);
     mzd_free_window(A11);
 
+    __M4RI_DD_MZD(A);
+    __M4RI_DD_MZP(P);
+    __M4RI_DD_MZP(Q);
+    __M4RI_DD_RCI(r1 + r2);
     return r1 + r2;
   }
 }
 
-
 rci_t _mzd_pluq_naive(mzd_t *A, mzp_t *P, mzp_t *Q)  {
   rci_t curr_pos = 0;
   for (curr_pos = 0; curr_pos < A->ncols; ) {
     P->values[i] = i;
   for (rci_t i = curr_pos; i < A->ncols; ++i)
     Q->values[i] = i;
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_RCI(curr_pos);
   return curr_pos;
 }
  
     }
   }
 
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_RCI(row_pos);
   return row_pos;
 }
-
-
   A->ncols = ncols;
   A->width = width;
 
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_INT(curr_pos);
   return curr_pos;
 }
 
     word const correction = __M4RI_CONVERT_TO_WORD(m4ri_codebook[k]->ord[i]);
     mzd_xor_bits(T, i,c, k, correction);
   }
+
+  __M4RI_DD_MZD(T);
+  __M4RI_DD_RCI_ARRAY(Le, twokay);
+  __M4RI_DD_RCI_ARRAY(Lm, twokay);
 }
 
 void mzd_process_rows2_pls(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
 
     _mzd_combine2(m0,t0,t1,wide);
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows3_pls(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
 
     _mzd_combine3(m0,t0,t1,t2,wide);
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows4_pls(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
 
     _mzd_combine4(m0, t0, t1, t2, t3, wide);
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void _mzd_finish_pls_done_pivots(mzd_t *A, mzp_t const *P, rci_t const start_row, rci_t const start_col, wi_t const addblock, int const k) {
       }
     }
   }
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
 }
 
 void _mzd_finish_pls_done_rest1(mzd_t *A,
     word *t = A->rows[i] + addblock;
     _mzd_combine(t, s0, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 
     word *t = A->rows[i] + addblock;
     _mzd_combine2(t, s0, s1, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 
     word *t = A->rows[i] + addblock;
     _mzd_combine3(t, s0, s1, s2, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 
     word *t = A->rows[i] + addblock;
     _mzd_combine4(t, s0, s1, s2, s3, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 /* extract U from A for table creation */
   for(rci_t i = 0; i < k; ++i)
     for(rci_t j = startcol; j < c + i; ++j) 
       mzd_write_bit(U, i, j,  0);
+
+  __M4RI_DD_MZD(U);
   return U;
 }
 
   m4ri_mm_free(E2);  m4ri_mm_free(M2);
   m4ri_mm_free(E3);  m4ri_mm_free(M3);
   m4ri_mm_free(done);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_RCI(curr_row);
   return curr_row;
 }
 
   mzd_apply_p_left_trans(B, Q);
 
   /* P L U Q B5 = B1 */
+  __M4RI_DD_MZD(B); 
+  __M4RI_DD_INT(retval);
   return retval;
 }
 
   
   mzp_free(P);
   mzp_free(Q);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZD(B);
   return retval;
 }
 
   if (r == A->ncols) {
     mzp_free(P);
     mzp_free(Q);
+    __M4RI_DD_MZD(A);
     return NULL;
   }
 
   mzd_free_window(RU);
   mzd_free_window(U);
   mzd_free_window(B);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZD(R);
   return R;
 }
   mzd_free(X0);
   mzd_free(X1);
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     mzd_free_window(C_bulk);
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
   mzd_free_window(C00); mzd_free_window(C01);
   mzd_free_window(C10); mzd_free_window(C11);
   
+  __M4RI_DD_MZD(C);
   return C;
 }
 #endif
   mzd_free(X1);
   mzd_free(X2);
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     mzd_free_window(C_bulk);
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       mzd_free_window ((mzd_t*)B0); mzd_free_window ((mzd_t*)B1);
     }
   }
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     *c ^= par;//m4ri_parity64(parity);
   }
   mzd_free (BT);
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     m4ri_die("mzd_addmul: C (%d x %d) has wrong dimensions, expected (%d x %d)\n",
 	     C->nrows, C->ncols, A->nrows, B->ncols);
   }
-  if(A->nrows == 0 || A->ncols == 0 || B->ncols == 0)
+  if(A->nrows == 0 || A->ncols == 0 || B->ncols == 0) {
+    __M4RI_DD_MZD(C);
     return C;
+  }
 
   C = _mzd_addmul(C, A, B, cutoff);
+  __M4RI_DD_MZD(C);
   return C;
 }
-
-
   mzd_free_window((mzd_t*)U00);
   mzd_free_window((mzd_t*)U01);
   mzd_free_window((mzd_t*)U11);
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_right_weird(mzd_t const *U, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep][0], i + offset);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_right_even(mzd_t const *U, mzd_t *B, const int cutoff) {
   mzd_free_window((mzd_t*)U00);
   mzd_free_window((mzd_t*)U01);
   mzd_free_window((mzd_t*)U11);
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_right_base(mzd_t const *U, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep][0], i);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_right_weird(mzd_t const *L, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep ][0], i + offset);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_right_even(mzd_t const *L, mzd_t *B, const int cutoff) {
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_right_base(mzd_t const *L, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep][0], i);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_left_weird(mzd_t const *L, mzd_t *B) {
       }
     }
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_left_even(mzd_t const *L, mzd_t *B, const int cutoff) {
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 /*****************
     mzd_free_window((mzd_t*)U01);
     mzd_free_window((mzd_t*)U11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_left_weird (mzd_t const *U, mzd_t *B) {
       }
     }
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_left_even(mzd_t const *U, mzd_t *B, const int cutoff) {
     mzd_free_window((mzd_t*)U01);
     mzd_free_window((mzd_t*)U11);
   }
+
+  __M4RI_DD_MZD(B);
 }
  */
 
 static inline void _mzd_combine8(word *c, word const *t1, word const *t2, word const *t3, word const *t4,
-                                 word const *t5, word const *t6, word const *t7, word const *t8, wi_t wide) {
+                                 word const *t5, word const *t6, word const *t7, word const *t8, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t8 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
   for(wi_t i = 0; i < wide; ++i) {
     c[i] ^= t1[i] ^ t2[i] ^ t3[i] ^ t4[i] ^ t5[i] ^ t6[i] ^ t7[i] ^ t8[i];
   }
+
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 /**
  *
  */
 
-static inline void _mzd_combine4(word *c, word const *t1, word const *t2, word const *t3, word const *t4, wi_t wide) {
+static inline void _mzd_combine4(word *c, word const *t1, word const *t2, word const *t3, word const *t4, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t4 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     t4 = (word*)__t4;
     wide = ((sizeof(word) * wide) % 16) / sizeof(word);
   }
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
     case 1:    *c++ ^= *t1++ ^ *t2++ ^ *t3++ ^ *t4++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 /**
  *
  */
 
-static inline void _mzd_combine3(word *c, word const *t1, word const *t2, word const *t3, wi_t wide) {
+static inline void _mzd_combine3(word *c, word const *t1, word const *t2, word const *t3, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t3 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     t3 = (word*)__t3;
     wide = ((sizeof(word) * wide) % 16) / sizeof(word);
   }
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
     case 1:    *c++ ^= *t1++ ^ *t2++ ^ *t3++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 
  *
  */
 
-static inline void _mzd_combine2(word *c, word const *t1, word const *t2, wi_t wide) {
+static inline void _mzd_combine2(word *c, word const *t1, word const *t2, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t2 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     t2 = (word*)__t2;
     wide = ((sizeof(word) * wide) % 16) / sizeof(word);
   }
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
     case 1:    *c++ ^= *t1++ ^ *t2++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 /**
  *
  */
 
-static inline void _mzd_combine(word *c, word const *t1, wi_t wide) {
+static inline void _mzd_combine(word *c, word const *t1, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming c, t1 are alligned the same way */
 
   t1 = (word*)__t1;
   wide = ((sizeof(word) * wide) % 16) / sizeof(word);
 
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
 
   wi_t n = (wide + 7) / 8;
     case 1:    *c++ ^= *t1++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }