Commits

CarloWood committed ac84149

Bug fix and general fixups. Testsuite for transpose.

Added test_transpose.c to the testsuite.
Fixed a bug for non-square matrices of specific sizes where
uninitialized data was written to the excess bits of the
destination matrix of mzd_transpose.
Added a few asserts related to multiblock matrices.
A few minor documentation fixes and typos.

Comments (0)

Files changed (6)

 libm4ri_la_LDFLAGS = -release 0.0.20110501 -no-undefined
 libm4ri_la_LIBADD = -lm
 
-check_PROGRAMS=test_multiplication test_elimination test_trsm test_pls test_solve test_kernel test_random test_smallops
+check_PROGRAMS=test_multiplication test_elimination test_trsm test_pls test_solve test_kernel test_random test_smallops test_transpose
 test_multiplication_SOURCES=testsuite/test_multiplication.c
 test_multiplication_LDFLAGS=-lm4ri -lm
 test_multiplication_CFLAGS=-I$(srcdir)/src
 test_smallops_LDFLAGS=-lm4ri -lm
 test_smallops_CFLAGS=-I$(srcdir)/src
 
-TESTS = test_multiplication test_elimination test_trsm test_pls test_solve test_kernel test_random test_smallops
+test_transpose_SOURCES=testsuite/test_transpose.c
+test_transpose_LDFLAGS=-lm4ri -lm
+test_transpose_CFLAGS=-I$(srcdir)/src
+
+TESTS = test_multiplication test_elimination test_trsm test_pls test_solve test_kernel test_random test_smallops test_transpose

src/packedmatrix.c

   int i = n;
   do {
     t[0] = wks[0];
-    if (--i == 0)
+    if (--i == 0) {
+      t[1] = 0;
+      t[2] = 0;
+      t[3] = 0;
       break;
+    }
     t[1] = wks[rowstride_src];
-    if (--i == 0)
+    if (--i == 0) {
+      t[2] = 0;
+      t[3] = 0;
       break;
+    }
     t[2] = wks[2 * rowstride_src];
-    if (--i == 0)
+    if (--i == 0) {
+      t[3] = 0;
       break;
+    }
     t[3] = wks[3 * rowstride_src];
     if (--i == 0)
       break;
 
 mzd_t *_mzd_transpose(mzd_t *DST, mzd_t const *A) {
   assert(!mzd_is_windowed(DST) && !mzd_is_windowed(A));
+  // We assume that there fit at least 64 rows in a block, if
+  // that is the case then each block will contain a multiple
+  // of 64 rows, since blockrows is a power of 2.
+  assert(A->blockrows_log >= 6 && DST->blockrows_log >= 6);
 
   rci_t nrows = A->nrows;
   rci_t ncols = A->ncols;
 
   if (maxsize >= 64) {
 
+    int const multiple_blocks = (A->flags | DST->flags) & mzd_flag_multiple_blocks;
+    if (__M4RI_UNLIKELY(multiple_blocks)) {
+      assert(!multiple_blocks);
+      // FIXME: Implement function that supports matrices of more than one block.
+      //return _mzd_transpose_multiblock(DST, A);
+    }
+
     if (nrows >= 64) {
-
-    /*
-     * This is an interesting #if ...
-     * I recommend to investigate the number of instructions, and the clocks per instruction,
-     * as function of various sizes of the matrix (most likely especially the number of columns
-     * (the size of a row) will have influence; also always use multiples of 64 or even 128),
-     * for both cases below.
-     *
-     * To measure this run for example:
-     *
-     * ./bench_packedmatrix -m 10 -x 10 -p PAPI_TOT_INS,PAPI_L1_TCM,PAPI_L2_TCM mzd_transpose 32000 32000
-     * ./bench_packedmatrix -m 10 -x 100 -p PAPI_TOT_INS,PAPI_L1_TCM,PAPI_L2_TCM mzd_transpose 128 10240
-     * etc (increase -x for smaller sizes to get better accuracy).
-     *
-     * --Carlo Wood
-     */
+      /*
+       * This is an interesting #if ...
+       * I recommend to investigate the number of instructions, and the clocks per instruction,
+       * as function of various sizes of the matrix (most likely especially the number of columns
+       * (the size of a row) will have influence; also always use multiples of 64 or even 128),
+       * for both cases below.
+       *
+       * To measure this run for example:
+       *
+       * ./bench_packedmatrix -m 10 -x 10 -p PAPI_TOT_INS,PAPI_L1_TCM,PAPI_L2_TCM mzd_transpose 32000 32000
+       * ./bench_packedmatrix -m 10 -x 100 -p PAPI_TOT_INS,PAPI_L1_TCM,PAPI_L2_TCM mzd_transpose 128 10240
+       * etc (increase -x for smaller sizes to get better accuracy).
+       *
+       * --Carlo Wood
+       */
 #if 1
       int js = ncols & nrows & 64;	// True if the total number of whole 64x64 matrices is odd.
       wi_t const rowstride_64_dst = 64 * DST->rowstride;
       if (js) {
 	js = 1;
 	_mzd_copy_transpose_64x64(fwd, fws, DST->rowstride, A->rowstride);
-	if (nrows == 64 && ncols == 64) {
+	if ((nrows | ncols) == 64) {
 	  __M4RI_DD_MZD(DST);
 	  return DST;
 	}

src/packedmatrix.h

 #endif
 
 /**
- * Maximum number of bytes allocated for one mzd_t block.
+ * Maximum number of words allocated for one mzd_t block.
  *
  * \note This value must fit in an int, even though it's type is size_t.
  */
 
-#define __M4RI_MAX_MZD_BLOCKSIZE (((size_t)1) << 30)
+#define __M4RI_MAX_MZD_BLOCKSIZE (((size_t)1) << 27)
 
 /**
  * \brief Matrix multiplication block-ing dimension.
    * 1: Has non-zero excess.
    * 2: Is windowed, but has zero offset.
    * 3: Is windowed, but has zero excess.
-   * 4: Spans more than 1 block.
+   * 4: Is windowed, but owns the blocks allocations.
+   * 5: Spans more than 1 block.
    */
 
   uint8_t flags;

testsuite/Makefile.in

 	test_trsm test_pluq \
 	test_solve test_kernel \
 	test_random \
-	test_smallops
+	test_smallops \
+	test_transpose
 
 BENCH_PRGS = \
 	bench_elimination \

testsuite/bench_packedmatrix.c

   printf("\n");
   for (int n = 1; n < data_len; ++n)
   {
-    printf("%s (%f) per bit (devided by ", papi_event_name(papi_events[n - 1]), (double)data[n] / params.count);
+    printf("%s (%f) per bit (divided by ", papi_event_name(papi_events[n - 1]), (double)data[n] / params.count);
     print_complexity_human(&params, function_mapper[f].complexity_code);
     printf("): %f\n", data[n] / (params.count * cost));
   }

testsuite/test_transpose.c

+/*
+ * test_transpose.c
+ *
+ * Application to test functionality of mzd_transpose.
+ *
+ * Copyright (C) 2011  Carlo Wood  <carlo@alinoe.com>
+ * RSA-1024 0x624ACAD5 1997-01-26                    Sign & Encrypt
+ * Fingerprint16 = 32 EC A7 B6 AC DB 65 A6  F6 F6 55 DD 1C DC FF 61
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "m4ri.h"
+
+int test_size[18] = {
+  1, 3, 4, 7, 8, 11, 16, 17, 32, 40, 64, 80, 128, 160, 192, 240, 256, 512
+};
+
+int test_transpose(int i)
+{
+  int failure = 0;
+  rci_t m = test_size[i];
+  printf("transpose m: %4d, n: ", m);
+  for (int j = 0; j < 18 && !failure; ++j) {
+    rci_t n = test_size[j];
+    printf("%d", n);
+    if (j != 17)
+      printf(",");
+    int size = m * n;
+    int loop_size = MAX(64 * 64 / size, 2);
+    for (int i = 0; i < loop_size && !failure; ++i)
+    {
+      mzd_t* A = mzd_init(m, n);
+      mzd_t* B = mzd_init(m, n);
+      mzd_randomize(A);
+      mzd_randomize(B);
+      mzd_t* C = mzd_add(NULL, A, B);
+      mzd_t* AT = mzd_transpose(NULL, A);
+      mzd_t* BT = mzd_transpose(NULL, B);
+      mzd_t* CT = mzd_add(NULL, AT, BT);
+      mzd_t* CTT = mzd_transpose(NULL, CT);
+      if (!mzd_equal(C, CTT))
+	++failure;
+      mzd_free(A);
+      mzd_free(B);
+      mzd_free(C);
+      mzd_free(AT);
+      mzd_free(BT);
+      mzd_free(CT);
+      mzd_free(CTT);
+    }
+  }
+  printf("  ");
+  if (failure) {
+    printf("FAILED\n");
+  }
+  else
+    printf("passed\n");
+  return failure;
+}
+
+int main()
+{
+  int status = 0;
+
+  for (int i = 0; i < 18; ++i) {
+      status += test_transpose(i);
+  }
+
+  if (!status) {
+    printf("All tests passed.\n");
+  } else {
+    printf("TEST FAILED!\n");
+    return 1;
+  }
+
+  return 0;
+}