0432681
committed
Commits
Comments (0)
Files changed (20)

+5 0.dep.inc

+4 0.hgignore

+128 0Makefile

+75 0changelog.txt

+193 0instrset.h

+149 0instrset_detect.cpp

+619 0license.txt

+0 0special.zip

+53 0vectorclass.h

+0 0vectorclass.pdf

+2419 0vectorf128.h

+3004 0vectorf256.h

+1944 0vectorf256e.h

— —vectori128.h

— —vectori256.h

— —vectori256e.h

— —vectormath_common.h

— —vectormath_exp.h

— —vectormath_lib.h

— —vectormath_trig.h
Makefile
changelog.txt
+ * inline implementation of mathematical functions added (vectormath_exp.h vectormath_trig.h vectormath_common.h)
+ * added operators +, , *, / between floating point vectors and scalars to remove overloading ambiguity
instrset.h
+#define CLANG_VERSION ((__clang_major__) * 10000 + (__clang_minor__) * 100 + (__clang_patchlevel__))
instrset_detect.cpp
+#if defined (_MSC_VER)  defined (__INTEL_COMPILER) // Microsoft or Intel compiler, intrin.h included
+#if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000)  (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
license.txt
special.zip
Binary file added.
vectorclass.h
vectorclass.pdf
Binary file added.
vectorf128.h
+#define FIX_CLANG_VECTOR_ALIAS_AMBIGUITY // clang 3.3 has silent conversion between intrinsic vector types. Will probably be fixed in v. 3.4
+ __m128 mask = _mm_loadu_ps((float const*)(maskl+4(index & 3))); // mask with FFFFFFFF at index position
+ __m128 mask = _mm_loadu_ps((float const*)(maskl+4(index&1)*2)); // mask with FFFFFFFFFFFFFFFF at index position
+ __m128 mask = _mm_loadu_ps((float const*)(maskl+4(index & 3))); // mask with FFFFFFFF at index position
+static inline Vec4f round(Vec4f const & a) __attribute__ ((optimize("fnounsafemathoptimizations")));
+ Vec4f signmask = _mm_castsi128_ps(constant4i<0x80000000,0x80000000,0x80000000,0x80000000>()); // 0.0
+ Vec4f magic = _mm_castsi128_ps(constant4i<0x4B000000,0x4B000000,0x4B000000,0x4B000000>()); // magic number = 2^23
+// approximate reciprocal squareroot (Faster than 1.f / sqrt(a). Relative accuracy better than 2^11)
.dep.inc