Commits

Anonymous committed 1a00450

inlining a couple of often called functions, this should help a bit

Comments (0)

Files changed (4)

   }
 }
 
-void *m4ri_mm_calloc( int count, int size ) {
-#ifdef HAVE_MM_MALLOC
-  void *newthing = _mm_malloc(count*size, 16);
-#else
-  void *newthing = calloc(count, size);
-#endif
-  if (newthing==NULL) {
-    m4ri_die("m4ri_mm_calloc: calloc returned NULL\n");
-    return NULL; /* unreachable. */
-  }
-#ifdef HAVE_MM_MALLOC
-  char *b = (char*)newthing;
-  memset(b, 0, count*size);
-#endif
-  return newthing;
-}
-
-void *m4ri_mm_malloc( int size ) {
-#ifdef HAVE_MM_MALLOC
-  void *newthing = _mm_malloc(size, 16);
-#else
-  void *newthing=malloc( size );
-#endif  
-  if (newthing==NULL && (size>0)) {
-    m4ri_die("m4ri_mm_malloc: malloc returned NULL\n");
-    return NULL; /* unreachable */
-  }
-  else return newthing;
-}
-
-void m4ri_mm_free(void *condemned, ...) { 
-#ifdef HAVE_MM_MALLOC
-  _mm_free(condemned); 
-#else
-  free(condemned);
-#endif  
-}
-
 #define RAND_SHORT ((word)(rand()&((1<<16)-1)))
 
 word m4ri_random_word() {
  * \todo Allow user to register calloc function.
  */
 
-void *m4ri_mm_calloc( int count, int size );
+/* void *m4ri_mm_calloc( int count, int size ); */
+static inline void *m4ri_mm_calloc( int count, int size ) {
+#ifdef HAVE_MM_MALLOC
+  void *newthing = _mm_malloc(count*size, 16);
+#else
+  void *newthing = calloc(count, size);
+#endif
+  if (newthing==NULL) {
+    m4ri_die("m4ri_mm_calloc: calloc returned NULL\n");
+    return NULL; /* unreachable. */
+  }
+#ifdef HAVE_MM_MALLOC
+  char *b = (char*)newthing;
+  memset(b, 0, count*size);
+#endif
+  return newthing;
+}
 
 /**
  * \brief Malloc wrapper.
  * \todo Allow user to register malloc function.
  */
 
-void *m4ri_mm_malloc( int size );
+/* void *m4ri_mm_malloc( int size ); */
+static inline void *m4ri_mm_malloc( int size ) {
+#ifdef HAVE_MM_MALLOC
+  void *newthing = _mm_malloc(size, 16);
+#else
+  void *newthing=malloc( size );
+#endif  
+  if (newthing==NULL && (size>0)) {
+    m4ri_die("m4ri_mm_malloc: malloc returned NULL\n");
+    return NULL; /* unreachable */
+  }
+  else return newthing;
+}
+
 
 /**
  * \brief Free wrapper.
  * \todo Allow user to register free function.
  */
 
-void m4ri_mm_free(void *condemned, ...);
+/* void m4ri_mm_free(void *condemned, ...); */
+static inline void m4ri_mm_free(void *condemned, ...) { 
+#ifdef HAVE_MM_MALLOC
+  _mm_free(condemned); 
+#else
+  free(condemned);
+#endif  
+}
 
 /**
  * \brief Enable memory block cache (default: disabled)

src/packedmatrix.c

   }
 }
 
-void mzd_row_add_offset(packedmatrix *M, size_t dstrow, size_t srcrow, size_t coloffset) {
-  coloffset += M->offset;
-  const size_t startblock= coloffset/RADIX;
-  size_t wide = M->width - startblock;
-  word *src = M->values + M->rowswap[srcrow] + startblock;
-  word *dst = M->values + M->rowswap[dstrow] + startblock;
-
-  word temp = *src++;
-  if (coloffset%RADIX)
-    temp = RIGHTMOST_BITS(temp, (RADIX-(coloffset%RADIX)-1));
-  *dst++ ^= temp;
-  wide--;
-
-#ifdef HAVE_SSE2 
-  if (ALIGNMENT(src,16)==8 && wide) {
-    *dst++ ^= *src++;
-    wide--;
-  }
-  __m128i *__src = (__m128i*)src;
-  __m128i *__dst = (__m128i*)dst;
-  const __m128i *eof = (__m128i*)((unsigned long)(src + wide) & ~0xF);
-  __m128i xmm1;
-  
-  while(__src < eof) {
-    xmm1 = _mm_xor_si128(*__dst, *__src++);
-    *__dst++ = xmm1;
-  }
-  src  = (word*)__src;
-  dst = (word*)__dst;
-  wide = ((sizeof(word)*wide)%16)/sizeof(word);
-#endif
-  size_t i;
-  for(i=0; i<wide; i++) {
-    dst[i] ^= src[i];
-  }
-}
-
 void mzd_row_add( packedmatrix *m, size_t sourcerow, size_t destrow) {
   mzd_row_add_offset(m, destrow, sourcerow, 0);
 }

src/packedmatrix.h

  * \param coloffset Column offset
  */
 
-void mzd_row_add_offset(packedmatrix *M, size_t dstrow, size_t srcrow, size_t coloffset);
+/*void mzd_row_add_offset(packedmatrix *M, size_t dstrow, size_t srcrow, size_t coloffset); */
+static inline void mzd_row_add_offset(packedmatrix *M, size_t dstrow, size_t srcrow, size_t coloffset) {
+  coloffset += M->offset;
+  const size_t startblock= coloffset/RADIX;
+  size_t wide = M->width - startblock;
+  word *src = M->values + M->rowswap[srcrow] + startblock;
+  word *dst = M->values + M->rowswap[dstrow] + startblock;
+
+  word temp = *src++;
+  if (coloffset%RADIX)
+    temp = RIGHTMOST_BITS(temp, (RADIX-(coloffset%RADIX)-1));
+  *dst++ ^= temp;
+  wide--;
+
+#ifdef HAVE_SSE2 
+  if (ALIGNMENT(src,16)==8 && wide) {
+    *dst++ ^= *src++;
+    wide--;
+  }
+  __m128i *__src = (__m128i*)src;
+  __m128i *__dst = (__m128i*)dst;
+  const __m128i *eof = (__m128i*)((unsigned long)(src + wide) & ~0xF);
+  __m128i xmm1;
+  
+  while(__src < eof) {
+    xmm1 = _mm_xor_si128(*__dst, *__src++);
+    *__dst++ = xmm1;
+  }
+  src  = (word*)__src;
+  dst = (word*)__dst;
+  wide = ((sizeof(word)*wide)%16)/sizeof(word);
+#endif
+  size_t i;
+  for(i=0; i<wide; i++) {
+    dst[i] ^= src[i];
+  }
+}
 
 /**
  * \brief Add the rows sourcerow and destrow and stores the total in