1. CarloWood
  2. M4RI

Commits

CarloWood  committed 9b8dcab

Add --enable-debug-dump.

When configured with --enable-debug-dump, print a trace
of (hash values of) output values and their function/location,
upon leaving any function that does something significant.

This can be used to quickly find the function that behaves
different in the case that some patch breaks the testsuite.

  • Participants
  • Parent commits 2809e3f
  • Branches default

Comments (0)

Files changed (17)

File Makefile.am

View file
 
 lib_LTLIBRARIES = libm4ri.la
 
-libm4ri_la_SOURCES = src/brilliantrussian.c src/misc.c src/packedmatrix.c src/grayflex.c src/strassen.c src/permutation.c src/trsm.c src/pls.c src/solve.c src/pls_mmpf.c src/echelonform.c src/mmc.c
+libm4ri_la_SOURCES = src/brilliantrussian.c src/misc.c src/packedmatrix.c src/grayflex.c src/strassen.c src/permutation.c src/trsm.c src/pls.c src/solve.c src/pls_mmpf.c src/echelonform.c src/mmc.c src/debug_dump.c
 BUILT_SOURCES = src/m4ri_config.h
 
 pkgincludesubdir = $(includedir)/m4ri

File configure.ac

View file
 AC_SUBST(M4RI_HAVE_OPENMP)
 
 # Debugging support
-AC_ARG_ENABLE(debug, [  --enable-debug          Enable assert() statements for debugging.])
+AC_ARG_ENABLE([debug],
+	AS_HELP_STRING([--enable-debug], [Enable assert() statements for debugging.]))
 
 if test "x$enable_debug" = x"yes"; then
    DEBUG_FLAGS="-g"
    AC_DEFINE(NDEBUG,1,[Define whether debugging is enabled])
 fi
 
+AC_ARG_ENABLE([debug-dump],
+        AS_HELP_STRING([--enable-debug-dump], [Dump output at exit of every function.]))
+
+if test "x$enable_debug_dump" = "xyes"; then
+  M4RI_DEBUG_DUMP=1
+else
+  M4RI_DEBUG_DUMP=0
+fi
+AC_SUBST(M4RI_DEBUG_DUMP)
+
 # For the testsuite. Detect if PAPI is installed. See http://icl.cs.utk.edu/papi/ .
 
 if test -z "$m4ri_config_papi"; then

File src/brilliantrussian.c

View file
       }
     }
     if (found == 0) {
-      return j - c;
+      break;
     }
   }
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_INT(j - c);
   return j - c;
 }
 
       }
     }
     if (found == 0) {
-      return j - c;
+      break;
     }
   }
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_INT(j - c);
   return j - c;
 }
 
     }
     ++start_row;
   }
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_INT(k);
   return k;
 }
 
       dst[j] = src[j];
     }
   }
+  __M4RI_DD_MZD(A);
 }
 
 void mzd_make_table(mzd_t const *M, rci_t r, rci_t c, int k, mzd_t *T, rci_t *L)
     case 1:  *ti++ = (*m++ ^ *ti1++) & mask_end;
     }
   }
+  __M4RI_DD_MZD(T);
+  __M4RI_DD_RCI_ARRAY(L, twokay);
 }
 
 void mzd_process_rows(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, mzd_t const *T, rci_t const *L) {
         } while (--n > 0);
       }
     }
+    __M4RI_DD_MZD(M);
     return;
   }
 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows2(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, mzd_t const *T0, rci_t const *L0, mzd_t const *T1, rci_t const *L1) {
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows3(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows4(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows5(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows6(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k, 
       } while (--n > 0);
     }
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 rci_t _mzd_echelonize_m4ri(mzd_t *A, int const full, int k, int heuristic, double const threshold) {
   mzd_free(T5);
   m4ri_mm_free(L5);
   mzd_free(U);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_RCI(r);
   return r;
 }
 
   mzd_free(T5);
   m4ri_mm_free(L5);
   mzd_free(U);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_RCI(r);
   return r;
 }
 
   
   mzd_free(big);
   
+  __M4RI_DD_MZD(answer);
   return answer;
 }
 
   mzd_free(T8);
 #endif
   m4ri_mm_free(buffer);
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       }
     }
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 //#undef __M4RI_M4RM_GRAY8
   m4ri_mm_free(L6);
   m4ri_mm_free(L7);
 #endif
+
+  __M4RI_DD_MZD(B);
 }

File src/debug_dump.c

View file
+/******************************************************************************
+*
+*            M4RI: Linear Algebra over GF(2)
+*
+*    Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
+*
+*  Distributed under the terms of the GNU General Public License (GPL)
+*  version 2 or higher.
+*
+*    This code is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*    General Public License for more details.
+*
+*  The full text of the GPL is available at:
+*
+*                  http://www.gnu.org/licenses/
+******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "packedmatrix.h"
+#include "permutation.h"
+
+#if __M4RI_DEBUG_DUMP
+
+static unsigned long dd_sequence_number = 0;
+
+static void entry(char const* function, char const* file, int line)
+{
+  printf("Sequence#: %ld; %s @ %s:%d; ", dd_sequence_number, function, file, line);
+  ++dd_sequence_number;
+}
+
+static word calculate_hash(word const* rowptr, wi_t wide)
+{
+  unsigned long long hash = 0;
+  for (word const* ptr = rowptr; ptr < rowptr + wide; ++ptr)
+    hash ^= *ptr;
+  return hash;
+}
+
+static inline word rotate_word(word w, int shift)
+{
+  return (w << shift) | (w >> (m4ri_radix - w));
+}
+
+void m4ri_dd_int(char const* function, char const* file, int line, int i)
+{
+  entry(function, file, line);
+  printf("int: %d\n", i);
+}
+
+void m4ri_dd_rci(char const* function, char const* file, int line, rci_t rci)
+{
+  entry(function, file, line);
+  printf("rci: %d\n", rci);
+}
+
+void m4ri_dd_rci_array(char const* function, char const* file, int line, rci_t *rciptr, int len)
+{
+  entry(function, file, line);
+  unsigned long long hash = 0;
+  for (int i = 0; i < len; ++i)
+    hash ^= rotate_word(rciptr[i], i % m4ri_radix);
+  printf("rci array (size %d) hash: %llx\n", len, hash);
+}
+
+void m4ri_dd_rawrow(char const* function, char const* file, int line, word const* rowptr, wi_t wide)
+{
+  entry(function, file, line);
+  unsigned long long hash = calculate_hash(rowptr, wide);
+  printf("raw row (%d words) hash: %llx\n", wide, hash);
+}
+
+void m4ri_dd_row(char const* function, char const* file, int line, mzd_t const* M, rci_t row)
+{
+  entry(function, file, line);
+  unsigned long long hash = calculate_hash(M->rows[row], M->width);
+  printf("row %d hash: %llx\n", row, hash);
+}
+
+void m4ri_dd_mzd(char const* function, char const* file, int line, mzd_t const* M)
+{
+  entry(function, file, line);
+  unsigned long long hash = 0;
+  for (rci_t r = 0; r < M->nrows; ++r)
+    hash ^= rotate_word(calculate_hash(M->rows[r], M->width), r % m4ri_radix);
+  printf("mzd hash: %llx\n", hash);
+}
+
+void m4ri_dd_mzp(char const* function, char const* file, int line, mzp_t const* P)
+{
+  entry(function, file, line);
+  unsigned long long hash = 0;
+  for (rci_t i = 0; i < P->length; ++i)
+    hash ^= rotate_word(P->values[i], i % m4ri_radix);
+  printf("mzp hash: %llx\n", hash);
+}
+
+#endif

File src/debug_dump.h

View file
+/**
+ * \file debug_dump.h
+ *
+ * \brief Debug utility
+ * 
+ * \author Carlo Wood <carlo@alinoe.com>
+ *
+ * To enable dumping of output per function, configure the library with --enable-debug-dump.
+ */
+/******************************************************************************
+*
+*                 M4RI: Linear Algebra over GF(2)
+*
+*    Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
+*
+*  Distributed under the terms of the GNU General Public License (GPL)
+*  version 2 or higher.
+*
+*    This code is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*    General Public License for more details.
+*
+*  The full text of the GPL is available at:
+*
+*                  http://www.gnu.org/licenses/
+******************************************************************************/
+
+#ifndef M4RI_DOXYGEN
+
+#if __M4RI_DEBUG_DUMP
+
+struct mzd_t;
+struct mzp_t;
+
+extern void m4ri_dd_int(char const* function, char const* file, int line, int i);
+extern void m4ri_dd_rci(char const* function, char const* file, int line, rci_t rci);
+extern void m4ri_dd_rci_array(char const* function, char const* file, int line, rci_t *rciptr, int len);
+extern void m4ri_dd_rawrow(char const* function, char const* file, int line, word const* rowptr, wi_t wide);
+extern void m4ri_dd_row(char const* function, char const* file, int line, struct mzd_t const* M, rci_t row);
+extern void m4ri_dd_mzd(char const* function, char const* file, int line, struct mzd_t const* M);
+extern void m4ri_dd_mzp(char const* function, char const* file, int line, struct mzp_t const* P);
+
+#define __M4RI_DD_INT(i) m4ri_dd_int(__FUNCTION__, __FILE__, __LINE__, i)
+#define __M4RI_DD_RCI(rci) m4ri_dd_rci(__FUNCTION__, __FILE__, __LINE__, rci)
+#define __M4RI_DD_RCI_ARRAY(rciptr, len) m4ri_dd_rci_array(__FUNCTION__, __FILE__, __LINE__, rciptr, len)
+#define __M4RI_DD_RAWROW(rowptr, wide) m4ri_dd_rawrow(__FUNCTION__, __FILE__, __LINE__, rowptr, wide)
+#define __M4RI_DD_ROW(M, row) m4ri_dd_row(__FUNCTION__, __FILE__, __LINE__, M, row)
+#define __M4RI_DD_MZD(M) m4ri_dd_mzd(__FUNCTION__, __FILE__, __LINE__, M)
+#define __M4RI_DD_MZP(P) m4ri_dd_mzp(__FUNCTION__, __FILE__, __LINE__, P)
+
+#else // __M4RI_DEBUG_DUMP
+
+#define __M4RI_DD_INT(i)
+#define __M4RI_DD_RCI(rci)
+#define __M4RI_DD_RCI_ARRAY(rciptr, len)
+#define __M4RI_DD_RAWROW(rowptr, wide)
+#define __M4RI_DD_ROW(M, row)
+#define __M4RI_DD_MZD(M)
+#define __M4RI_DD_MZP(P)
+
+#endif // __M4RI_DEBUG_DUMP
+
+#endif // M4RI_DOXYGEN

File src/echelonform.c

View file
   
   mzp_free(P);
   mzp_free(Q);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_RCI(r);
   return r;
 }

File src/m4ri_config.h.in

View file
 #define __M4RI_HAVE_POSIX_MEMALIGN	@M4RI_HAVE_POSIX_MEMALIGN@
 #define __M4RI_HAVE_SSE2		@M4RI_HAVE_SSE2@
 #define __M4RI_HAVE_OPENMP		@M4RI_HAVE_OPENMP@
+#define __M4RI_DEBUG_DUMP		@M4RI_DEBUG_DUMP@
 
 // Helper macros.
 #define __M4RI_USE_MM_MALLOC		(__M4RI_HAVE_MM_MALLOC && __M4RI_HAVE_SSE2)

File src/packedmatrix.c

View file
     window->rows[i] = m->rows[lowr + i] + offset;
   }
   
+  __M4RI_DD_MZD(window);
   return window;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(M);
+  __M4RI_DD_RCI(pivots);
   return pivots;
 }
 
       DST->rows[64+k+j][1] ^= t[3];			// D
     }
   }
+
+  __M4RI_DD_MZD(DST);
   return DST;
 }
 
         mzd_write_bit(DST, j, i, mzd_read_bit(A, i, j));
       }
     }
+    __M4RI_DD_MZD(DST);
     return DST;
   }
 
       collect = 0;
     }
   }
+
+  __M4RI_DD_MZD(DST);
   return DST;
 }
 
   mzd_free_window(AT); mzd_free_window(CT);
   mzd_free_window(BT); mzd_free_window(DT);
   
+  __M4RI_DD_MZD(DST);
   return DST;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     for(rci_t j = 0; j < n; ++j)
       if (mzd_read_bit(v,i,j))
         mzd_combine(C,i,0, C,i,0, A,j,0);
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       A->rows[i][width] ^= (A->rows[i][width] ^ m4ri_random_word()) & mask_end;
     }
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 void mzd_set_ui( mzd_t *A, unsigned int value) {
     }
   }
 
-  if(value % 2 == 0)
+  if(value % 2 == 0) {
+    __M4RI_DD_MZD(A);
     return;
+  }
 
   rci_t const stop = MIN(A->nrows, A->ncols);
   for (rci_t i = 0; i < stop; ++i) {
     mzd_write_bit(A, i, i, 1);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 int mzd_equal(mzd_t const *A, mzd_t const *B) {
       m4ri_die("mzd_copy: completely unaligned copy not implemented yet.");
     }
   }
+
+  __M4RI_DD_MZD(N);
   return N;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       dst_truerow[j] = src_truerow[j]; 
     }
   }
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
   INV = mzd_submatrix(INV, H, 0, A->ncols, A->nrows, 2 * A->ncols);
 
   mzd_free(H);
+
+  __M4RI_DD_MZD(INV);
   return INV;
 }
 
     for(rci_t i = 0; i < nrows; ++i) {
       mzd_combine_weird(C,i,0, A,i,0, B,i,0);
     }
+    __M4RI_DD_MZD(C);
     return C;
   }
 
       mzd_combine_even(C,i,0, A,i,0, B,i,0);
     }
   }
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       }
     }
   }
+  __M4RI_DD_MZD(S);
   return S;
 }
 
       register word x = ((b >> a_bit) ^ (b >> b_bit)) & m4ri_one; // XOR temporary
       *base = b ^ ((x << a_bit) | (x << b_bit));
     }
+    __M4RI_DD_MZD(M);
     return;
   }
 
     }
   }
 
+  __M4RI_DD_MZD(M);
 }
 
-
 int mzd_is_zero(mzd_t const *A) {
   /* Could be improved: stopping as the first non zero value is found (status!=0) */
   rci_t const mb = A->nrows;
     b[c / m4ri_radix] &= __M4RI_LEFT_BITMASK(m4ri_radix - rest);
     b[c / m4ri_radix] |= temp;
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void mzd_copy_row(mzd_t *B, rci_t i, mzd_t const *A, rci_t j) {
   } else {
     b[0] = (b[0] & ~mask_begin) | (a[0] & mask_begin & mask_end) | (b[0] & ~mask_end);
   }
+
+  __M4RI_DD_ROW(B, i);
 }
 
 
   for (wi_t i = startblock + 1; i < M->width; ++i) {
     M->rows[row][i] = 0;
   }
+
+  __M4RI_DD_ROW(M, row);
 }
 
 
-int mzd_find_pivot(mzd_t const *A, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c) { 
+int mzd_find_pivot(mzd_t const *A, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c) {
   assert(A->offset == 0);
   rci_t const nrows = A->nrows;
   rci_t const ncols = A->ncols;
             break;
           }
         }
+	__M4RI_DD_RCI(*r);
+	__M4RI_DD_RCI(*c);
+	__M4RI_DD_INT(1);
         return 1;
       }
     }
           break;
         }
       }
+      __M4RI_DD_RCI(*r);
+      __M4RI_DD_RCI(*c);
+      __M4RI_DD_INT(1);
       return 1;
     }
     /* handle complete words */
             break;
           }
         }
+	__M4RI_DD_RCI(*r);
+	__M4RI_DD_RCI(*c);
+	__M4RI_DD_INT(1);
         return 1;
       }
     }
           break;
         }
       }
+      __M4RI_DD_RCI(*r);
+      __M4RI_DD_RCI(*c);
+      __M4RI_DD_INT(1);
       return 1;
     }
   }
+  __M4RI_DD_RCI(*r);
+  __M4RI_DD_RCI(*c);
+  __M4RI_DD_INT(0);
   return 0;
 }
 
     for (wi_t j = 1; j < end; ++j)
       tmp |= row[j];
     tmp |= row[end] & mask_end;
-    if(tmp)
+    if(tmp) {
+      __M4RI_DD_INT(i + 1);
       return i + 1;
+    }
   }
+  __M4RI_DD_INT(0);
   return 0;
 }

File src/packedmatrix.h

View file
 #endif
 
 #include "misc.h"
+#include "debug_dump.h"
 
 #if __M4RI_HAVE_SSE2
 /**
  * The most fundamental data type in this library.
  */
 
-typedef struct {
+typedef struct mzd_t {
   /**
    * Contains pointers to the actual blocks of memory containing the
    * values packed into words of size m4ri_radix.
   tmp = (a[width] ^ b[width]) & mask_end;
   a[width] ^= tmp;
   b[width] ^= tmp;
+
+  __M4RI_DD_ROW(M, rowa);
+  __M4RI_DD_ROW(M, rowb);
 }
 
 /**
     a[0] ^= tmp;
     b[0] ^= tmp;
   }
+
+  __M4RI_DD_ROW(M, rowa);
+  __M4RI_DD_ROW(M, rowb);
 }
 
 /**
       x |= x << coldiff;			/* Duplicate this bit at both column positions. */
       *vp = v ^ x;				/* Swap column bits and store result. */
     }
+    __M4RI_DD_MZD(M);
     return;
   }
 
     *(base + a_word) = a;
     *(base + b_word) = b;
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 /**
    * that we last accessed, in the previous loop.
    */
   dst[i - 1] ^= src[i - 1] & ~mask_end;
+
+  __M4RI_DD_ROW(M, dstrow);
 }
 
 /**
     mzd_clear_bits(C, c_row, i, (C->ncols - i));
     mzd_xor_bits(C, c_row, i, (C->ncols - i), tmp);
   }
-  return;
+
+  __M4RI_DD_MZD(C);
 }
 
 /**
   }
 
   *a ^= *b & __M4RI_LEFT_BITMASK(A->ncols%m4ri_radix);
-  return;
+
+  __M4RI_DD_MZD(A);
 }
 
 
     }
   }
   *c ^= ((*a ^ *b ^ *c) & __M4RI_LEFT_BITMASK(C->ncols%m4ri_radix));
-  return;
+
+  __M4RI_DD_MZD(C);
 }
 
 

File src/permutation.c

View file
   mzp_t *window = (mzp_t *)m4ri_mm_malloc(sizeof(mzp_t));
   window->values = P->values + begin;
   window->length = end - begin;
+  __M4RI_DD_MZP(window);
   return window;
 }
 
       Arow[a_word] |= value;
     }
   }
+
+  __M4RI_DD_MZD(A);
 }
+
 /**
  * Implements both apply_p_right and apply_p_right_trans.
  */
   m4ri_mm_free(permutation);
   m4ri_mm_free(write_mask);
   mzd_free(B);
+
+  __M4RI_DD_MZD(A);
 }
 
 void _mzd_apply_p_right_trans(mzd_t *A, mzp_t const *P) {
 /*     assert(P->values[i] >= i); */
 /*     mzd_col_swap(A, i, P->values[i]); */
 /*   } */
+
+  __M4RI_DD_MZD(A);
 }
 
 void _mzd_apply_p_right(mzd_t *A, mzp_t const *P) {
 /*     assert(P->values[i] >= i); */
 /*     mzd_col_swap(A, i, P->values[i]); */
 /*   } */
+
+  __M4RI_DD_MZD(A);
 }
 
 
       mzd_col_swap_in_rows(A, i, P->values[i], r, MIN(row_bound, i));
     }
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 void _mzd_compress_l(mzd_t *A, rci_t r1, rci_t n1, rci_t r2) {
   }
  
 #endif
+
+  __M4RI_DD_MZD(A);
 }
 

File src/permutation.h

View file
  * \brief Permutations.
  */
 
-typedef struct {
+typedef struct mzp_t {
   /**
    * The swap operations in LAPACK format.
    */

File src/pls.c

View file
     mzd_free_window(A10);
     mzd_free_window(A11);
 
+    __M4RI_DD_MZD(A);
+    __M4RI_DD_MZP(P);
+    __M4RI_DD_MZP(Q);
+    __M4RI_DD_RCI(r1 + r2);
     return r1 + r2;
   }
 }
 
-
 rci_t _mzd_pluq_naive(mzd_t *A, mzp_t *P, mzp_t *Q)  {
   rci_t curr_pos = 0;
   for (curr_pos = 0; curr_pos < A->ncols; ) {
     P->values[i] = i;
   for (rci_t i = curr_pos; i < A->ncols; ++i)
     Q->values[i] = i;
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_RCI(curr_pos);
   return curr_pos;
 }
  
     }
   }
 
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_RCI(row_pos);
   return row_pos;
 }
-
-

File src/pls_mmpf.c

View file
   A->ncols = ncols;
   A->width = width;
 
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_INT(curr_pos);
   return curr_pos;
 }
 
     word const correction = __M4RI_CONVERT_TO_WORD(m4ri_codebook[k]->ord[i]);
     mzd_xor_bits(T, i,c, k, correction);
   }
+
+  __M4RI_DD_MZD(T);
+  __M4RI_DD_RCI_ARRAY(Le, twokay);
+  __M4RI_DD_RCI_ARRAY(Lm, twokay);
 }
 
 void mzd_process_rows2_pls(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
 
     _mzd_combine2(m0,t0,t1,wide);
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows3_pls(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
 
     _mzd_combine3(m0,t0,t1,t2,wide);
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void mzd_process_rows4_pls(mzd_t *M, rci_t startrow, rci_t stoprow, rci_t startcol, int k,
 
     _mzd_combine4(m0, t0, t1, t2, t3, wide);
   }
+
+  __M4RI_DD_MZD(M);
 }
 
 void _mzd_finish_pls_done_pivots(mzd_t *A, mzp_t const *P, rci_t const start_row, rci_t const start_col, wi_t const addblock, int const k) {
       }
     }
   }
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
 }
 
 void _mzd_finish_pls_done_rest1(mzd_t *A,
     word *t = A->rows[i] + addblock;
     _mzd_combine(t, s0, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 
     word *t = A->rows[i] + addblock;
     _mzd_combine2(t, s0, s1, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 
     word *t = A->rows[i] + addblock;
     _mzd_combine3(t, s0, s1, s2, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 
     word *t = A->rows[i] + addblock;
     _mzd_combine4(t, s0, s1, s2, s3, wide);
   }
+
+  __M4RI_DD_MZD(A);
 }
 
 /* extract U from A for table creation */
   for(rci_t i = 0; i < k; ++i)
     for(rci_t j = startcol; j < c + i; ++j) 
       mzd_write_bit(U, i, j,  0);
+
+  __M4RI_DD_MZD(U);
   return U;
 }
 
   m4ri_mm_free(E2);  m4ri_mm_free(M2);
   m4ri_mm_free(E3);  m4ri_mm_free(M3);
   m4ri_mm_free(done);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZP(P);
+  __M4RI_DD_MZP(Q);
+  __M4RI_DD_RCI(curr_row);
   return curr_row;
 }
 

File src/solve.c

View file
   mzd_apply_p_left_trans(B, Q);
 
   /* P L U Q B5 = B1 */
+  __M4RI_DD_MZD(B); 
+  __M4RI_DD_INT(retval);
   return retval;
 }
 
   
   mzp_free(P);
   mzp_free(Q);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZD(B);
   return retval;
 }
 
   if (r == A->ncols) {
     mzp_free(P);
     mzp_free(Q);
+    __M4RI_DD_MZD(A);
     return NULL;
   }
 
   mzd_free_window(RU);
   mzd_free_window(U);
   mzd_free_window(B);
+
+  __M4RI_DD_MZD(A);
+  __M4RI_DD_MZD(R);
   return R;
 }

File src/strassen.c

View file
   mzd_free(X0);
   mzd_free(X1);
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     mzd_free_window(C_bulk);
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
   mzd_free_window(C00); mzd_free_window(C01);
   mzd_free_window(C10); mzd_free_window(C11);
   
+  __M4RI_DD_MZD(C);
   return C;
 }
 #endif
   mzd_free(X1);
   mzd_free(X2);
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     mzd_free_window(C_bulk);
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     }
   }
 
+  __M4RI_DD_MZD(C);
   return C;
 }
 
       mzd_free_window ((mzd_t*)B0); mzd_free_window ((mzd_t*)B1);
     }
   }
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     *c ^= par;//m4ri_parity64(parity);
   }
   mzd_free (BT);
+
+  __M4RI_DD_MZD(C);
   return C;
 }
 
     m4ri_die("mzd_addmul: C (%d x %d) has wrong dimensions, expected (%d x %d)\n",
 	     C->nrows, C->ncols, A->nrows, B->ncols);
   }
-  if(A->nrows == 0 || A->ncols == 0 || B->ncols == 0)
+  if(A->nrows == 0 || A->ncols == 0 || B->ncols == 0) {
+    __M4RI_DD_MZD(C);
     return C;
+  }
 
   C = _mzd_addmul(C, A, B, cutoff);
+  __M4RI_DD_MZD(C);
   return C;
 }
-
-

File src/trsm.c

View file
   mzd_free_window((mzd_t*)U00);
   mzd_free_window((mzd_t*)U01);
   mzd_free_window((mzd_t*)U11);
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_right_weird(mzd_t const *U, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep][0], i + offset);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_right_even(mzd_t const *U, mzd_t *B, const int cutoff) {
   mzd_free_window((mzd_t*)U00);
   mzd_free_window((mzd_t*)U01);
   mzd_free_window((mzd_t*)U11);
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_right_base(mzd_t const *U, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep][0], i);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_right_weird(mzd_t const *L, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep ][0], i + offset);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_right_even(mzd_t const *L, mzd_t *B, const int cutoff) {
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_right_base(mzd_t const *L, mzd_t *B) {
       if(__M4RI_GET_BIT(dotprod, babystep))
 	__M4RI_FLIP_BIT(B->rows[giantstep + babystep][0], i);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_left_weird(mzd_t const *L, mzd_t *B) {
       }
     }
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_lower_left_even(mzd_t const *L, mzd_t *B, const int cutoff) {
     mzd_free_window((mzd_t*)L10);
     mzd_free_window((mzd_t*)L11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 /*****************
     mzd_free_window((mzd_t*)U01);
     mzd_free_window((mzd_t*)U11);
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_left_weird (mzd_t const *U, mzd_t *B) {
       }
     }
   }
+
+  __M4RI_DD_MZD(B);
 }
 
 void _mzd_trsm_upper_left_even(mzd_t const *U, mzd_t *B, const int cutoff) {
     mzd_free_window((mzd_t*)U01);
     mzd_free_window((mzd_t*)U11);
   }
+
+  __M4RI_DD_MZD(B);
 }

File src/xor.h

View file
  */
 
 static inline void _mzd_combine8(word *c, word const *t1, word const *t2, word const *t3, word const *t4,
-                                 word const *t5, word const *t6, word const *t7, word const *t8, wi_t wide) {
+                                 word const *t5, word const *t6, word const *t7, word const *t8, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t8 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
   for(wi_t i = 0; i < wide; ++i) {
     c[i] ^= t1[i] ^ t2[i] ^ t3[i] ^ t4[i] ^ t5[i] ^ t6[i] ^ t7[i] ^ t8[i];
   }
+
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 /**
  *
  */
 
-static inline void _mzd_combine4(word *c, word const *t1, word const *t2, word const *t3, word const *t4, wi_t wide) {
+static inline void _mzd_combine4(word *c, word const *t1, word const *t2, word const *t3, word const *t4, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t4 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     t4 = (word*)__t4;
     wide = ((sizeof(word) * wide) % 16) / sizeof(word);
   }
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
     case 1:    *c++ ^= *t1++ ^ *t2++ ^ *t3++ ^ *t4++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 /**
  *
  */
 
-static inline void _mzd_combine3(word *c, word const *t1, word const *t2, word const *t3, wi_t wide) {
+static inline void _mzd_combine3(word *c, word const *t1, word const *t2, word const *t3, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t3 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     t3 = (word*)__t3;
     wide = ((sizeof(word) * wide) % 16) / sizeof(word);
   }
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
     case 1:    *c++ ^= *t1++ ^ *t2++ ^ *t3++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 
  *
  */
 
-static inline void _mzd_combine2(word *c, word const *t1, word const *t2, wi_t wide) {
+static inline void _mzd_combine2(word *c, word const *t1, word const *t2, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming t1 ... t2 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     t2 = (word*)__t2;
     wide = ((sizeof(word) * wide) % 16) / sizeof(word);
   }
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
     case 1:    *c++ ^= *t1++ ^ *t2++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }
 
 /**
  *
  */
 
-static inline void _mzd_combine(word *c, word const *t1, wi_t wide) {
+static inline void _mzd_combine(word *c, word const *t1, wi_t wide_in) {
+  wi_t wide = wide_in;
 #if __M4RI_HAVE_SSE2
   /* assuming c, t1 are alligned the same way */
 
   t1 = (word*)__t1;
   wide = ((sizeof(word) * wide) % 16) / sizeof(word);
 
-  if(!wide)
+  if(!wide) {
+    __M4RI_DD_RAWROW(c, wide_in);
     return;
+  }
 #endif // __M4RI_HAVE_SSE2
 
   wi_t n = (wide + 7) / 8;
     case 1:    *c++ ^= *t1++;
     } while (--n > 0);
   }
+  __M4RI_DD_RAWROW(c, wide_in);
 }