Commits

CarloWood  committed e252ff9

Do not install or include config.h in header files.

This patch introduces src/m4ri_config.h.in, from which
src/m4ri_config.h is generated during configure, which
subsequently is installed instead of config.h and included
by other headers. This to avoid to have the whole list
of macros defined in config.h polute the macro namespace
for users of the library.

Header files now use __M4RI_ prefixed versions of
HAVE_SSE2, HAVE_MM_MALLOC, HAVE_POSIX_MEMALIGN and HAVE_OPENMP,
although rather than undefining HAVE_MM_MALLOC when HAVE_SSE2
isn't set, we use two helper macros: __M4RI_USE_MM_MALLOC
and __M4RI_USE_POSIX_MEMALIGN (the idea being, as before,
that without sse2 alignment is not needed).

Also include testsuite/testing.h in the dist tar-ball,
because otherwise 'make check' is broken for tar-ball
releases.

Renamed SSE2_CUTOFF --> __M4RI_SSE2_CUTOFF because it's
also visible / used in a header.

  • Participants
  • Parent commits c30c32c

Comments (0)

Files changed (8)

 lib_LTLIBRARIES = libm4ri.la
 
 libm4ri_la_SOURCES = src/brilliantrussian.c src/misc.c src/packedmatrix.c src/grayflex.c src/strassen.c src/permutation.c src/trsm.c src/pls.c src/solve.c src/pls_mmpf.c src/echelonform.c src/mmc.c
+BUILT_SOURCES = src/m4ri_config.h
 
 pkgincludesubdir = $(includedir)/m4ri
-pkgincludesub_HEADERS = src/m4ri.h src/brilliantrussian.h src/misc.h src/packedmatrix.h src/grayflex.h src/strassen.h src/parity.h src/permutation.h src/config.h src/trsm.h src/pls.h src/solve.h src/pls_mmpf.h src/echelonform.h src/xor.h src/mmc.h
+pkgincludesub_HEADERS = src/m4ri.h src/brilliantrussian.h src/misc.h src/packedmatrix.h src/grayflex.h src/strassen.h src/parity.h src/permutation.h src/trsm.h src/pls.h src/solve.h src/pls_mmpf.h src/echelonform.h src/xor.h src/mmc.h
+nodist_pkgincludesub_HEADERS = src/m4ri_config.h
 
 #libm4ri_la_LDFLAGS = -version-info 0:0:0
 libm4ri_la_LDFLAGS = -release 0.0.20110501 -no-undefined
 test_random_LDFLAGS=-lm4ri -lm
 test_random_CFLAGS=-I$(srcdir)/src
 
-test_smallops_SOURCES=testsuite/test_smallops.c testsuite/testing.c
+test_smallops_SOURCES=testsuite/test_smallops.c testsuite/testing.c testsuite/testing.h
 test_smallops_LDFLAGS=-lm4ri -lm
 test_smallops_CFLAGS=-I$(srcdir)/src
 

File configure.ac

 
 AC_CONFIG_HEADERS(src/config.h)
 
-m4_include([m4/ax_gcc_x86_cpuid.m4])
-m4_include([m4/ax_ext.m4])dnl
-m4_include([m4/ax_cpu_vendor.m4])dnl
-m4_include([m4/ax_cache_size.m4])dnl
-m4_include([m4/ax_cache_size_tune.m4])dnl
-m4_include([m4/ax_check_compiler_flags.m4])dnl
-m4_include([m4/ax_openmp.m4])dnl
-
 dnl Check if a C++ compiler was specified. If so, assume we want to wrap word in a C++ class.
 AC_EGREP_CPP(YES, [
 #ifdef __cplusplus
           fi
    esac
 ])
+if test x"$ax_cv_have_sse2_ext" = x"yes"; then
+  M4RI_HAVE_SSE2=1
+else
+  M4RI_HAVE_SSE2=0
+fi
+AC_SUBST(M4RI_HAVE_SSE2)
 
 AC_ARG_WITH(papi,
     AS_HELP_STRING([--with-papi@<:@=PATH@:>@], [The PAPI install prefix, if configure can't find it.]),
     [m4ri_config_papi=$withval])
 
 AC_CHECK_HEADER([mm_malloc.h],AC_DEFINE(HAVE_MM_MALLOC,,[Support aligned allocations]),)
+if test "$ac_cv_header_mm_malloc_h" = "yes"; then
+  M4RI_HAVE_MM_MALLOC=1
+else
+  M4RI_HAVE_MM_MALLOC=0
+fi
+AC_SUBST(M4RI_HAVE_MM_MALLOC)
 
 # Correctly working posix_memalign
 AX_FUNC_POSIX_MEMALIGN
+if test "$ax_cv_func_posix_memalign_works" = "yes"; then
+  M4RI_HAVE_POSIX_MEMALIGN=1
+else
+  M4RI_HAVE_POSIX_MEMALIGN=0
+fi
+AC_SUBST(M4RI_HAVE_POSIX_MEMALIGN)
 
 # OpenMP support
 AC_ARG_ENABLE([openmp],
    AX_OPENMP()
 ])
 AC_SUBST(OPENMP_CFLAGS)
+if test -n "$OPENMP_CFLAGS"; then
+  M4RI_HAVE_OPENMP=1
+else
+  M4RI_HAVE_OPENMP=0
+fi
+AC_SUBST(M4RI_HAVE_OPENMP)
 
 # Debugging support
 AC_ARG_ENABLE(debug, [  --enable-debug          Enable assert() statements for debugging.])
 
 AC_PROG_MAKE_SET
 
-AC_CONFIG_FILES([Makefile testsuite/Makefile])
+AC_CONFIG_FILES([Makefile testsuite/Makefile src/m4ri_config.h])
 AC_OUTPUT
 

File src/m4ri_config.h.in

+#ifndef M4RI_M4RI_CONFIG_H
+#define M4RI_M4RI_CONFIG_H
+
+// Defines determined during configuration of m4ri.
+#define __M4RI_HAVE_MM_MALLOC		@M4RI_HAVE_MM_MALLOC@
+#define __M4RI_HAVE_POSIX_MEMALIGN	@M4RI_HAVE_POSIX_MEMALIGN@
+#define __M4RI_HAVE_SSE2		@M4RI_HAVE_SSE2@
+#define __M4RI_HAVE_OPENMP		@M4RI_HAVE_OPENMP@
+
+// Helper macros.
+#define __M4RI_USE_MM_MALLOC		(__M4RI_HAVE_MM_MALLOC && __M4RI_HAVE_SSE2)
+#define __M4RI_USE_POSIX_MEMALIGN	(__M4RI_HAVE_POSIX_MEMALIGN && __M4RI_HAVE_SSE2)
+
+#endif // M4RI_M4RI_CONFIG_H
 #include <windows.h>
 #endif
 
-#ifndef HAVE_SSE2
-#undef HAVE_MM_MALLOC
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
 *
 ********************************************************************/
 
-#include "config.h"
+#include "m4ri_config.h"
 
-#ifndef HAVE_SSE2
-#undef HAVE_MM_MALLOC
-#endif
-
-#ifdef HAVE_MM_MALLOC
+#if __M4RI_USE_MM_MALLOC
 #include <mm_malloc.h>
 #endif
 
 
 static inline void *m4ri_mm_calloc(size_t count, size_t size) {
   void *newthing;
-#ifdef HAVE_OPENMP
+#if __M4RI_HAVE_OPENMP
 #pragma omp critical
 {
 #endif
 
-#ifdef HAVE_MM_MALLOC
+#if __M4RI_USE_MM_MALLOC
   newthing = _mm_malloc(count * size, 16);
-#elif HAVE_POSIX_MEMALIGN
+#elif __M4RI_USE_POSIX_MEMALIGN
   int error = posix_memalign(&newthing, 16, count * size);
   if (error) newthing = NULL;
 #else
   newthing = calloc(count, size);
 #endif
 
-#ifdef HAVE_OPENMP
+#if __M4RI_HAVE_OPENMP
  }
 #endif
 
     m4ri_die("m4ri_mm_calloc: calloc returned NULL\n");
     return NULL; /* unreachable. */
   }
-#if defined(HAVE_MM_MALLOC) || defined(HAVE_POSIX_MEMALIGN)
+#if __M4RI_USE_MM_MALLOC || __M4RI_USE_POSIX_MEMALIGN
   char *b = (char*)newthing;
   memset(b, 0, count * size);
 #endif
 
 static inline void *m4ri_mm_malloc(size_t size) {
   void *newthing;
-#ifdef HAVE_OPENMP
+#if __M4RI_HAVE_OPENMP
 #pragma omp critical
 {
 #endif
 
-#ifdef HAVE_MM_MALLOC
+#if __M4RI_USE_MM_MALLOC
   newthing = _mm_malloc(size, 16);
-#elif HAVE_POSIX_MEMALIGN
+#elif __M4RI_USE_POSIX_MEMALIGN
   int error = posix_memalign(&newthing, 16, size);
   if (error) newthing = NULL;
 #else
   newthing = malloc( size );
 #endif  
-#ifdef HAVE_OPENMP
+#if __M4RI_HAVE_OPENMP
  }
 #endif
   if (newthing==NULL && (size>0)) {
 
 /* void m4ri_mm_free(void *condemned, ...); */
 static inline void m4ri_mm_free(void *condemned, ...) { 
-#ifdef HAVE_MM_MALLOC
+#if __M4RI_USE_MM_MALLOC
   _mm_free(condemned); 
 #else
   free(condemned);

File src/packedmatrix.h

 *
 ********************************************************************/
 
+#include "m4ri_config.h"
+
 #include <math.h>
 #include <assert.h>
 #include <stdio.h>
 
-#ifdef HAVE_SSE2
+#if __M4RI_HAVE_SSE2
 #include <emmintrin.h>
 #endif
 
 #include "misc.h"
 
-#ifdef HAVE_SSE2
+#if __M4RI_HAVE_SSE2
 /**
  * \brief SSE2 cutoff in words.
  *
  * used.
  */
 
-#define SSE2_CUTOFF 10
+#define __M4RI_SSE2_CUTOFF 10
 #endif
 
 /**
   *dst++ ^= *src++ & mask_begin;
   --wide;
 
-#ifdef HAVE_SSE2 
-  int not_aligned = __M4RI_ALIGNMENT(src,16) != 0;		/* 0: Aligned, 1: Not aligned */
+#if __M4RI_HAVE_SSE2 
+  int not_aligned = __M4RI_ALIGNMENT(src,16) != 0;	/* 0: Aligned, 1: Not aligned */
   if (wide > not_aligned + 1)				/* Speed up for small matrices */
   {
     if (not_aligned) {
   word *a = A->rows[a_row] + a_startblock;
   word *b = B->rows[b_row] + b_startblock;
   
-#ifdef HAVE_SSE2
-  if(wide > SSE2_CUTOFF) {
+#if __M4RI_HAVE_SSE2
+  if(wide > __M4RI_SSE2_CUTOFF) {
     /** check alignments **/
     if (__M4RI_ALIGNMENT(a,16)) {
       *a++ ^= *b++;
       wide = ((sizeof(word) * wide) % 16) / sizeof(word);
     }
   }
-#endif //HAVE_SSE2
+#endif // __M4RI_HAVE_SSE2
 
   if (wide > 0) {
     wi_t n = (wide + 7) / 8;
   /*     c[i] = b[i]; */
   /*   } */
   /* } else { */
-#ifdef HAVE_SSE2
-  if(wide > SSE2_CUTOFF) {
+#if __M4RI_HAVE_SSE2
+  if(wide > __M4RI_SSE2_CUTOFF) {
     /** check alignments **/
     if (__M4RI_ALIGNMENT(a,16)) {
       *c++ = *b++ ^ *a++;
       wide = ((sizeof(word) * wide) % 16) / sizeof(word);
     }
   }
-#endif //HAVE_SSE2
+#endif // __M4RI_HAVE_SSE2
 
   if (wide > 0) {
     wi_t n = (wide + 7) / 8;
 #endif
 
   if (ncols <= m4ri_radix || A->width * A->nrows <= __M4RI_PLS_CUTOFF) {
-/*   if(ncols <= PLUQ_CUTOFF) { */
+/*   if(ncols <= __M4RI_PLUQ_CUTOFF) { */
     /* this improves data locality and runtime considerably */
     mzd_t *Abar = mzd_copy(NULL, A);
     rci_t r = _mzd_pls_mmpf(Abar, P, Q, 0);
 #ifndef M4RI_XOR_H
 #define M4RI_XOR_H
 
-#ifdef HAVE_SSE2
-#include <emmintrin.h>
-#endif
-
-#include "misc.h"
-
  /*******************************************************************
  *
  *                 M4RI:  Linear Algebra over GF(2)
  *
  ********************************************************************/
 
+#include "m4ri_config.h"
+
+#if __M4RI_HAVE_SSE2
+#include <emmintrin.h>
+#endif
+
+#include "misc.h"
+
 /**
  * Compute c[i] += t1[i] + t2[i] + t3[i] + t4[i] + t5[i] + t6[i] + t7[i] + t8[i] for 0 <= i < wide
  *
 
 static inline void _mzd_combine8(word *c, word const *t1, word const *t2, word const *t3, word const *t4,
                                  word const *t5, word const *t6, word const *t7, word const *t8, wi_t wide) {
-#ifdef HAVE_SSE2
+#if __M4RI_HAVE_SSE2
   /* assuming t1 ... t8 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     __m128i *__c = (__m128i*)c;
  */
 
 static inline void _mzd_combine4(word *c, word const *t1, word const *t2, word const *t3, word const *t4, wi_t wide) {
-#ifdef HAVE_SSE2
+#if __M4RI_HAVE_SSE2
   /* assuming t1 ... t4 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     __m128i *__c = (__m128i*)c;
   }
   if(!wide)
     return;
-#endif //HAVE_SSE2
+#endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
   case 0: do { *c++ ^= *t1++ ^ *t2++ ^ *t3++ ^ *t4++;
  */
 
 static inline void _mzd_combine3(word *c, word const *t1, word const *t2, word const *t3, wi_t wide) {
-#ifdef HAVE_SSE2
+#if __M4RI_HAVE_SSE2
   /* assuming t1 ... t3 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     __m128i *__c = (__m128i*)c;
   }
   if(!wide)
     return;
-#endif //HAVE_SSE2
+#endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
   case 0: do { *c++ ^= *t1++ ^ *t2++ ^ *t3++;
  */
 
 static inline void _mzd_combine2(word *c, word const *t1, word const *t2, wi_t wide) {
-#ifdef HAVE_SSE2
+#if __M4RI_HAVE_SSE2
   /* assuming t1 ... t2 are aligned, but c might not be */
   if (__M4RI_ALIGNMENT(c,16)==0) {
     __m128i *__c = (__m128i*)c;
   }
   if(!wide)
     return;
-#endif //HAVE_SSE2
+#endif // __M4RI_HAVE_SSE2
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {
   case 0: do { *c++ ^= *t1++ ^ *t2++;
  */
 
 static inline void _mzd_combine(word *c, word const *t1, wi_t wide) {
-#ifdef HAVE_SSE2
+#if __M4RI_HAVE_SSE2
   /* assuming c, t1 are alligned the same way */
 
   if (__M4RI_ALIGNMENT(c,16)==8 && wide) {
 
   if(!wide)
     return;
-#endif //HAVE_SSE2
+#endif // __M4RI_HAVE_SSE2
 
   wi_t n = (wide + 7) / 8;
   switch (wide % 8) {