35 #ifndef _BLAZE_MATH_SIMD_FMA_H_
36 #define _BLAZE_MATH_SIMD_FMA_H_
96 #if BLAZE_FMA_MODE && BLAZE_MIC_MODE
98 return _mm512_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
100 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
102 return _mm256_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
104 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
106 return _mm_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
129 template<
typename T1
159 #if BLAZE_FMA_MODE && BLAZE_MIC_MODE
161 return _mm512_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
163 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
165 return _mm256_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
167 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
169 return _mm_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
198 template<
typename T1
223 template<
typename T1
227 operator+(
const SIMDf32<T1>& a,
const SIMDf32MultExpr<T2,T3>& b )
229 return SIMDf32FmaddExpr<T2,T3,T1>( b.a_, b.b_, ~a );
249 template<
typename T1
254 operator+(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32MultExpr<T3,T4>& b )
256 return SIMDf32FmaddExpr< T1, T2, SIMDf32MultExpr<T3,T4> >( a.a_, a.b_, b );
278 template<
typename T1
283 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32<T4>& b )
285 return ( a.a_ * a.b_ ) + ( a.c_ + (~b) );
307 template<
typename T1
312 operator+(
const SIMDf32<T1>& a,
const SIMDf32FmaddExpr<T2,T3,T4>& b )
314 return ( b.a_ * b.b_ ) + ( b.c_ + (~a) );
336 template<
typename T1
342 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32MultExpr<T4,T5>& b )
344 return SIMDf32FmaddExpr< T4, T5, SIMDf32FmaddExpr<T1,T2,T3> >( b.a_, b.b_, a );
366 template<
typename T1
372 operator+(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32FmaddExpr<T3,T4,T5>& b )
374 return SIMDf32FmaddExpr< T1, T2, SIMDf32FmaddExpr<T3,T4,T5> >( a.a_, a.b_, b );
395 template<
typename T1
402 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
404 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
425 template<
typename T1
432 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
434 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
455 template<
typename T1
462 operator+(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
464 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
485 template<
typename T1
492 operator+(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
494 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) - ( b.c_ + a.c_ ) );
514 template<
typename T1
518 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32<T3>& b )
520 return SIMDf32FmsubExpr<T1,T2,T3>( a.a_, a.b_, ~b );
540 template<
typename T1
545 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32MultExpr<T3,T4>& b )
547 return SIMDf32FmsubExpr< T1, T2, SIMDf32MultExpr<T3,T4> >( a.a_, a.b_, b );
569 template<
typename T1
574 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32<T4>& b )
576 return ( a.a_ * a.b_ ) - ( a.c_ + (~b) );
598 template<
typename T1
604 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32MultExpr<T4,T5>& b )
606 return ( a.a_ * a.b_ ) - ( b.a_ * b.b_ + a.c_ );
628 template<
typename T1
634 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32FmsubExpr<T3,T4,T5>& b )
636 return SIMDf32FmsubExpr< T1, T2, SIMDf32FmsubExpr<T3,T4,T5> >( a.a_, a.b_, b );
657 template<
typename T1
664 operator-(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
666 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
687 template<
typename T1
694 operator-(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
696 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) - ( a.c_ + b.c_ ) );
717 template<
typename T1
724 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
726 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
747 template<
typename T1
754 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
756 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
778 template<
typename T1
808 #if BLAZE_FMA_MODE && BLAZE_MIC_MODE
810 return _mm512_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
812 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
814 return _mm256_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
816 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
818 return _mm_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
841 template<
typename T1
871 #if BLAZE_FMA_MODE && BLAZE_MIC_MODE
873 return _mm512_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
875 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
877 return _mm256_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
879 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
881 return _mm_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
910 template<
typename T1
935 template<
typename T1
939 operator+(
const SIMDf64<T1>& a,
const SIMDf64MultExpr<T2,T3>& b )
941 return SIMDf64FmaddExpr<T2,T3,T1>( b.a_, b.b_, ~a );
961 template<
typename T1
966 operator+(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64MultExpr<T3,T4>& b )
968 return SIMDf64FmaddExpr< T1, T2, SIMDf64MultExpr<T3,T4> >( a.a_, a.b_, b );
990 template<
typename T1
995 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64<T4>& b )
997 return ( a.a_ * a.b_ ) + ( a.c_ + (~b) );
1019 template<
typename T1
1024 operator+(
const SIMDf64<T1>& a,
const SIMDf64FmaddExpr<T2,T3,T4>& b )
1026 return ( b.a_ * b.b_ ) + ( b.c_ + (~a) );
1048 template<
typename T1
1054 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64MultExpr<T4,T5>& b )
1056 return SIMDf64FmaddExpr< T4, T5, SIMDf64FmaddExpr<T1,T2,T3> >( b.a_, b.b_, a );
1078 template<
typename T1
1084 operator+(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64FmaddExpr<T3,T4,T5>& b )
1086 return SIMDf64FmaddExpr< T1, T2, SIMDf64FmaddExpr<T3,T4,T5> >( a.a_, a.b_, b );
1107 template<
typename T1
1114 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1116 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
1137 template<
typename T1
1144 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1146 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
1167 template<
typename T1
1174 operator+(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1176 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
1197 template<
typename T1
1204 operator+(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1206 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) - ( b.c_ + a.c_ ) );
1226 template<
typename T1
1230 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64<T3>& b )
1232 return SIMDf64FmsubExpr<T1,T2,T3>( a.a_, a.b_, ~b );
1252 template<
typename T1
1257 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64MultExpr<T3,T4>& b )
1259 return SIMDf64FmsubExpr< T1, T2, SIMDf64MultExpr<T3,T4> >( a.a_, a.b_, b );
1281 template<
typename T1
1286 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64<T4>& b )
1288 return ( a.a_ * a.b_ ) - ( a.c_ + (~b) );
1310 template<
typename T1
1316 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64MultExpr<T4,T5>& b )
1318 return ( a.a_ * a.b_ ) - ( b.a_ * b.b_ + a.c_ );
1340 template<
typename T1
1346 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64FmsubExpr<T3,T4,T5>& b )
1348 return SIMDf64FmsubExpr< T1, T2, SIMDf64FmsubExpr<T3,T4,T5> >( a.a_, a.b_, b );
1369 template<
typename T1
1376 operator-(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1378 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
1399 template<
typename T1
1406 operator-(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1408 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) - ( a.c_ + b.c_ ) );
1429 template<
typename T1
1436 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1438 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
1459 template<
typename T1
1466 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1468 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
Addition operator for fusing a 32-bit floating point multiplication and addition. ...
Definition: FMA.h:781
Expression object for 64-bit floating point fused multiply-subtract operations.The SIMDf64FmsubExpr c...
Definition: FMA.h:844
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:826
Expression object for 64-bit floating point multiplication operations.The SIMDf64MultExpr class repre...
Definition: Multiplication.h:710
Expression object for 32-bit floating point fused multiply-subtract operations.The SIMDf32FmsubExpr c...
Definition: FMA.h:132
Header file for the SIMD subtraction functionality.
const T3 c_
The right-hand side operand for the addition.
Definition: FMA.h:828
const DenseIterator< Type, AF > operator+(const DenseIterator< Type, AF > &it, ptrdiff_t inc) noexcept
Addition between a DenseIterator and an integral value.
Definition: DenseIterator.h:699
Header file for the SIMD multiplication functionality.
BLAZE_ALWAYS_INLINE SIMDf64FmaddExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf64FmaddExpr class.
Definition: FMA.h:795
Expression object for 32-bit floating point fused multiply-add operations.The SIMDf32FmaddExpr class ...
Definition: FMA.h:69
const DenseIterator< Type, AF > operator-(const DenseIterator< Type, AF > &it, ptrdiff_t inc) noexcept
Subtraction between a DenseIterator and an integral value.
Definition: DenseIterator.h:731
SIMDf64< This > BaseType
Base type of this SIMDf64FMaddExpr instance.
Definition: FMA.h:785
SIMD type for 64-bit double precision floating point data values.
SIMDf32< This > BaseType
Base type of this SIMDf32FMaddExpr instance.
Definition: FMA.h:73
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
const T2 b_
The right-hand side operand for the multiplication.
Definition: Multiplication.h:754
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
Header file for the basic SIMD types.
SIMDf32< This > BaseType
Base type of this SIMDf32FMsubExpr instance.
Definition: FMA.h:136
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:827
const T1 a_
The left-hand side operand for the multiplication.
Definition: Multiplication.h:566
SIMDf64< This > BaseType
Base type of this SIMDf64FMsubExpr instance.
Definition: FMA.h:848
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:889
BLAZE_ALWAYS_INLINE SIMDf32FmsubExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf32FmsubExpr class.
Definition: FMA.h:146
SIMD type for 32-bit single precision floating point data values.
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:114
BLAZE_ALWAYS_INLINE SIMDf64FmsubExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf64FmsubExpr class.
Definition: FMA.h:858
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:177
Header file for the SIMD addition functionality.
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
System settings for the SSE mode.
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:178
Expression object for 32-bit floating point multiplication operations.The SIMDf32MultExpr class repre...
Definition: Multiplication.h:523
const T2 b_
The right-hand side operand for the multiplication.
Definition: Multiplication.h:567
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:115
System settings for the inline keywords.
BLAZE_ALWAYS_INLINE SIMDf32FmaddExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf32FmaddExpr class.
Definition: FMA.h:83
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:890
const T1 a_
The left-hand side operand for the multiplication.
Definition: Multiplication.h:753
const T3 c_
The right-hand side operand for the subtraction.
Definition: FMA.h:179
const T3 c_
The right-hand side operand for the subtraction.
Definition: FMA.h:891
const T3 c_
The right-hand side operand for the addition.
Definition: FMA.h:116