35 #ifndef _BLAZE_MATH_SIMD_FMA_H_ 36 #define _BLAZE_MATH_SIMD_FMA_H_ 70 :
public SIMDf32< SIMDf32FmaddExpr<T1,T2,T3> >
97 #if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) 99 return _mm512_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
101 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE 103 return _mm256_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
105 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE 107 return _mm_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
130 template<
typename T1
134 :
public SIMDf32< SIMDf32FmsubExpr<T1,T2,T3> >
161 #if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) 163 return _mm512_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
165 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE 167 return _mm256_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
169 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE 171 return _mm_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
200 template<
typename T1
225 template<
typename T1
229 operator+(
const SIMDf32<T1>& a,
const SIMDf32MultExpr<T2,T3>& b )
231 return SIMDf32FmaddExpr<T2,T3,T1>( b.a_, b.b_, ~a );
251 template<
typename T1
256 operator+(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32MultExpr<T3,T4>& b )
258 return SIMDf32FmaddExpr< T1, T2, SIMDf32MultExpr<T3,T4> >( a.a_, a.b_, b );
280 template<
typename T1
285 operator+( const SIMDf32FmaddExpr<T1,T2,T3>& a, const SIMDf32<T4>& b )
287 return ( a.a_ * a.b_ ) + ( a.c_ + (~b) );
309 template<
typename T1
314 operator+( const SIMDf32<T1>& a, const SIMDf32FmaddExpr<T2,T3,T4>& b )
316 return ( b.a_ * b.b_ ) + ( b.c_ + (~a) );
338 template<
typename T1
344 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32MultExpr<T4,T5>& b )
346 return SIMDf32FmaddExpr< T4, T5, SIMDf32FmaddExpr<T1,T2,T3> >( b.a_, b.b_, a );
368 template<
typename T1
374 operator+(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32FmaddExpr<T3,T4,T5>& b )
376 return SIMDf32FmaddExpr< T1, T2, SIMDf32FmaddExpr<T3,T4,T5> >( a.a_, a.b_, b );
397 template<
typename T1
404 operator+( const SIMDf32FmaddExpr<T1,T2,T3>& a, const SIMDf32FmaddExpr<T4,T5,T6>& b )
406 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
427 template<
typename T1
434 operator+( const SIMDf32FmaddExpr<T1,T2,T3>& a, const SIMDf32FmsubExpr<T4,T5,T6>& b )
436 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
457 template<
typename T1
464 operator+( const SIMDf32FmsubExpr<T1,T2,T3>& a, const SIMDf32FmaddExpr<T4,T5,T6>& b )
466 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
487 template<
typename T1
494 operator+( const SIMDf32FmsubExpr<T1,T2,T3>& a, const SIMDf32FmsubExpr<T4,T5,T6>& b )
496 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) - ( b.c_ + a.c_ ) );
516 template<
typename T1
520 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32<T3>& b )
522 return SIMDf32FmsubExpr<T1,T2,T3>( a.a_, a.b_, ~b );
542 template<
typename T1
547 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32MultExpr<T3,T4>& b )
549 return SIMDf32FmsubExpr< T1, T2, SIMDf32MultExpr<T3,T4> >( a.a_, a.b_, b );
571 template<
typename T1
576 operator-( const SIMDf32FmsubExpr<T1,T2,T3>& a, const SIMDf32<T4>& b )
578 return ( a.a_ * a.b_ ) - ( a.c_ + (~b) );
600 template<
typename T1
606 operator-( const SIMDf32FmsubExpr<T1,T2,T3>& a, const SIMDf32MultExpr<T4,T5>& b )
608 return ( a.a_ * a.b_ ) - ( b.a_ * b.b_ + a.c_ );
630 template<
typename T1
636 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32FmsubExpr<T3,T4,T5>& b )
638 return SIMDf32FmsubExpr< T1, T2, SIMDf32FmsubExpr<T3,T4,T5> >( a.a_, a.b_, b );
659 template<
typename T1
666 operator-( const SIMDf32FmaddExpr<T1,T2,T3>& a, const SIMDf32FmaddExpr<T4,T5,T6>& b )
668 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
689 template<
typename T1
696 operator-( const SIMDf32FmaddExpr<T1,T2,T3>& a, const SIMDf32FmsubExpr<T4,T5,T6>& b )
698 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) - ( a.c_ + b.c_ ) );
719 template<
typename T1
726 operator-( const SIMDf32FmsubExpr<T1,T2,T3>& a, const SIMDf32FmaddExpr<T4,T5,T6>& b )
728 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
749 template<
typename T1
756 operator-( const SIMDf32FmsubExpr<T1,T2,T3>& a, const SIMDf32FmsubExpr<T4,T5,T6>& b )
758 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
780 template<
typename T1
784 :
public SIMDf64< SIMDf64FmaddExpr<T1,T2,T3> >
811 #if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) 813 return _mm512_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
815 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE 817 return _mm256_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
819 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE 821 return _mm_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
844 template<
typename T1
848 :
public SIMDf64< SIMDf64FmsubExpr<T1,T2,T3> >
875 #if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) 877 return _mm512_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
879 #elif BLAZE_FMA_MODE && BLAZE_AVX_MODE 881 return _mm256_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
883 #elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE 885 return _mm_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
914 template<
typename T1
939 template<
typename T1
943 operator+(
const SIMDf64<T1>& a,
const SIMDf64MultExpr<T2,T3>& b )
945 return SIMDf64FmaddExpr<T2,T3,T1>( b.a_, b.b_, ~a );
965 template<
typename T1
970 operator+(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64MultExpr<T3,T4>& b )
972 return SIMDf64FmaddExpr< T1, T2, SIMDf64MultExpr<T3,T4> >( a.a_, a.b_, b );
994 template<
typename T1
999 operator+( const SIMDf64FmaddExpr<T1,T2,T3>& a, const SIMDf64<T4>& b )
1001 return ( a.a_ * a.b_ ) + ( a.c_ + (~b) );
1023 template<
typename T1
1028 operator+( const SIMDf64<T1>& a, const SIMDf64FmaddExpr<T2,T3,T4>& b )
1030 return ( b.a_ * b.b_ ) + ( b.c_ + (~a) );
1052 template<
typename T1
1058 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64MultExpr<T4,T5>& b )
1060 return SIMDf64FmaddExpr< T4, T5, SIMDf64FmaddExpr<T1,T2,T3> >( b.a_, b.b_, a );
1082 template<
typename T1
1088 operator+(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64FmaddExpr<T3,T4,T5>& b )
1090 return SIMDf64FmaddExpr< T1, T2, SIMDf64FmaddExpr<T3,T4,T5> >( a.a_, a.b_, b );
1111 template<
typename T1
1118 operator+( const SIMDf64FmaddExpr<T1,T2,T3>& a, const SIMDf64FmaddExpr<T4,T5,T6>& b )
1120 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
1141 template<
typename T1
1148 operator+( const SIMDf64FmaddExpr<T1,T2,T3>& a, const SIMDf64FmsubExpr<T4,T5,T6>& b )
1150 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
1171 template<
typename T1
1178 operator+( const SIMDf64FmsubExpr<T1,T2,T3>& a, const SIMDf64FmaddExpr<T4,T5,T6>& b )
1180 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
1201 template<
typename T1
1208 operator+( const SIMDf64FmsubExpr<T1,T2,T3>& a, const SIMDf64FmsubExpr<T4,T5,T6>& b )
1210 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) - ( b.c_ + a.c_ ) );
1230 template<
typename T1
1234 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64<T3>& b )
1236 return SIMDf64FmsubExpr<T1,T2,T3>( a.a_, a.b_, ~b );
1256 template<
typename T1
1261 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64MultExpr<T3,T4>& b )
1263 return SIMDf64FmsubExpr< T1, T2, SIMDf64MultExpr<T3,T4> >( a.a_, a.b_, b );
1285 template<
typename T1
1290 operator-( const SIMDf64FmsubExpr<T1,T2,T3>& a, const SIMDf64<T4>& b )
1292 return ( a.a_ * a.b_ ) - ( a.c_ + (~b) );
1314 template<
typename T1
1320 operator-( const SIMDf64FmsubExpr<T1,T2,T3>& a, const SIMDf64MultExpr<T4,T5>& b )
1322 return ( a.a_ * a.b_ ) - ( b.a_ * b.b_ + a.c_ );
1344 template<
typename T1
1350 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64FmsubExpr<T3,T4,T5>& b )
1352 return SIMDf64FmsubExpr< T1, T2, SIMDf64FmsubExpr<T3,T4,T5> >( a.a_, a.b_, b );
1373 template<
typename T1
1380 operator-( const SIMDf64FmaddExpr<T1,T2,T3>& a, const SIMDf64FmaddExpr<T4,T5,T6>& b )
1382 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
1403 template<
typename T1
1410 operator-( const SIMDf64FmaddExpr<T1,T2,T3>& a, const SIMDf64FmsubExpr<T4,T5,T6>& b )
1412 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) - ( a.c_ + b.c_ ) );
1433 template<
typename T1
1440 operator-( const SIMDf64FmsubExpr<T1,T2,T3>& a, const SIMDf64FmaddExpr<T4,T5,T6>& b )
1442 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
1463 template<
typename T1
1470 operator-( const SIMDf64FmsubExpr<T1,T2,T3>& a, const SIMDf64FmsubExpr<T4,T5,T6>& b )
1472 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
Addition operator for fusing a 32-bit floating point multiplication and addition.
Definition: FMA.h:783
Expression object for 64-bit floating point fused multiply-subtract operations.The SIMDf64FmsubExpr c...
Definition: FMA.h:847
constexpr const DenseIterator< Type, AF > operator-(const DenseIterator< Type, AF > &it, ptrdiff_t inc) noexcept
Subtraction between a DenseIterator and an integral value.
Definition: DenseIterator.h:750
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:829
Expression object for 64-bit floating point multiplication operations.The SIMDf64MultExpr class repre...
Definition: Mult.h:949
Expression object for 32-bit floating point fused multiply-subtract operations.The SIMDf32FmsubExpr c...
Definition: FMA.h:133
const T3 c_
The right-hand side operand for the addition.
Definition: FMA.h:831
Header file for the SIMD multiplication functionality.
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
BLAZE_ALWAYS_INLINE SIMDf64FmaddExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf64FmaddExpr class.
Definition: FMA.h:798
Expression object for 32-bit floating point fused multiply-add operations.The SIMDf32FmaddExpr class ...
Definition: FMA.h:69
SIMDf64< This > BaseType
Base type of this SIMDf64FMaddExpr instance.
Definition: FMA.h:788
SIMD type for 64-bit double precision floating point data values.
Header file for the SIMD addition functionality.
SIMDf32< This > BaseType
Base type of this SIMDf32FMaddExpr instance.
Definition: FMA.h:74
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
Header file for the SIMD subtraction functionality.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:994
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
Header file for the basic SIMD types.
SIMDf32< This > BaseType
Base type of this SIMDf32FMsubExpr instance.
Definition: FMA.h:138
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:830
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:798
SIMDf64< This > BaseType
Base type of this SIMDf64FMsubExpr instance.
Definition: FMA.h:852
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:893
BLAZE_ALWAYS_INLINE SIMDf32FmsubExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf32FmsubExpr class.
Definition: FMA.h:148
constexpr const DenseIterator< Type, AF > operator+(const DenseIterator< Type, AF > &it, ptrdiff_t inc) noexcept
Addition between a DenseIterator and an integral value.
Definition: DenseIterator.h:718
SIMD type for 32-bit single precision floating point data values.
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:115
BLAZE_ALWAYS_INLINE SIMDf64FmsubExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf64FmsubExpr class.
Definition: FMA.h:862
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:179
System settings for the SSE mode.
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:180
Expression object for 32-bit floating point multiplication operations.The SIMDf32MultExpr class repre...
Definition: Mult.h:754
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:799
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:116
System settings for the inline keywords.
BLAZE_ALWAYS_INLINE SIMDf32FmaddExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf32FmaddExpr class.
Definition: FMA.h:84
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:894
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:993
const T3 c_
The right-hand side operand for the subtraction.
Definition: FMA.h:181
const T3 c_
The right-hand side operand for the subtraction.
Definition: FMA.h:895
const T3 c_
The right-hand side operand for the addition.
Definition: FMA.h:117