35#ifndef _BLAZE_MATH_SIMD_FMA_H_
36#define _BLAZE_MATH_SIMD_FMA_H_
70 :
public SIMDf32< SIMDf32FmaddExpr<T1,T2,T3> >
97#if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE )
99 return _mm512_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
101#elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
103 return _mm256_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
105#elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
107 return _mm_fmadd_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
134 :
public SIMDf32< SIMDf32FmsubExpr<T1,T2,T3> >
161#if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE )
163 return _mm512_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
165#elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
167 return _mm256_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
169#elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
171 return _mm_fmsub_ps(
a_.eval().value,
b_.eval().value,
c_.eval().value );
229 operator+(
const SIMDf32<T1>& a,
const SIMDf32MultExpr<T2,T3>& b )
231 return SIMDf32FmaddExpr<T2,T3,T1>( b.a_, b.b_, *a );
256 operator+(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32MultExpr<T3,T4>& b )
258 return SIMDf32FmaddExpr< T1, T2, SIMDf32MultExpr<T3,T4> >( a.a_, a.b_, b );
285 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32<T4>& b )
287 return ( a.a_ * a.b_ ) + ( a.c_ + (*b) );
314 operator+(
const SIMDf32<T1>& a,
const SIMDf32FmaddExpr<T2,T3,T4>& b )
316 return ( b.a_ * b.b_ ) + ( b.c_ + (*a) );
344 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32MultExpr<T4,T5>& b )
346 return SIMDf32FmaddExpr< T4, T5, SIMDf32FmaddExpr<T1,T2,T3> >( b.a_, b.b_, a );
374 operator+(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32FmaddExpr<T3,T4,T5>& b )
376 return SIMDf32FmaddExpr< T1, T2, SIMDf32FmaddExpr<T3,T4,T5> >( a.a_, a.b_, b );
404 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
406 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
434 operator+(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
436 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
464 operator+(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
466 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
494 operator+(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
496 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) - ( b.c_ + a.c_ ) );
520 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32<T3>& b )
522 return SIMDf32FmsubExpr<T1,T2,T3>( a.a_, a.b_, *b );
547 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32MultExpr<T3,T4>& b )
549 return SIMDf32FmsubExpr< T1, T2, SIMDf32MultExpr<T3,T4> >( a.a_, a.b_, b );
576 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32<T4>& b )
578 return ( a.a_ * a.b_ ) - ( a.c_ + (*b) );
606 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32MultExpr<T4,T5>& b )
608 return ( a.a_ * a.b_ ) - ( b.a_ * b.b_ + a.c_ );
636 operator-(
const SIMDf32MultExpr<T1,T2>& a,
const SIMDf32FmsubExpr<T3,T4,T5>& b )
638 return SIMDf32FmsubExpr< T1, T2, SIMDf32FmsubExpr<T3,T4,T5> >( a.a_, a.b_, b );
666 operator-(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
668 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
696 operator-(
const SIMDf32FmaddExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
698 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) - ( a.c_ + b.c_ ) );
726 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmaddExpr<T4,T5,T6>& b )
728 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
756 operator-(
const SIMDf32FmsubExpr<T1,T2,T3>& a,
const SIMDf32FmsubExpr<T4,T5,T6>& b )
758 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
784 :
public SIMDf64< SIMDf64FmaddExpr<T1,T2,T3> >
811#if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE )
813 return _mm512_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
815#elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
817 return _mm256_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
819#elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
821 return _mm_fmadd_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
848 :
public SIMDf64< SIMDf64FmsubExpr<T1,T2,T3> >
875#if BLAZE_FMA_MODE && ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE )
877 return _mm512_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
879#elif BLAZE_FMA_MODE && BLAZE_AVX_MODE
881 return _mm256_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
883#elif BLAZE_FMA_MODE && BLAZE_SSE2_MODE
885 return _mm_fmsub_pd(
a_.eval().value,
b_.eval().value,
c_.eval().value );
943 operator+(
const SIMDf64<T1>& a,
const SIMDf64MultExpr<T2,T3>& b )
945 return SIMDf64FmaddExpr<T2,T3,T1>( b.a_, b.b_, *a );
970 operator+(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64MultExpr<T3,T4>& b )
972 return SIMDf64FmaddExpr< T1, T2, SIMDf64MultExpr<T3,T4> >( a.a_, a.b_, b );
999 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64<T4>& b )
1001 return ( a.a_ * a.b_ ) + ( a.c_ + (*b) );
1023template<
typename T1
1028 operator+(
const SIMDf64<T1>& a,
const SIMDf64FmaddExpr<T2,T3,T4>& b )
1030 return ( b.a_ * b.b_ ) + ( b.c_ + (*a) );
1052template<
typename T1
1058 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64MultExpr<T4,T5>& b )
1060 return SIMDf64FmaddExpr< T4, T5, SIMDf64FmaddExpr<T1,T2,T3> >( b.a_, b.b_, a );
1082template<
typename T1
1088 operator+(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64FmaddExpr<T3,T4,T5>& b )
1090 return SIMDf64FmaddExpr< T1, T2, SIMDf64FmaddExpr<T3,T4,T5> >( a.a_, a.b_, b );
1111template<
typename T1
1118 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1120 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
1141template<
typename T1
1148 operator+(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1150 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
1171template<
typename T1
1178 operator+(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1180 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
1201template<
typename T1
1208 operator+(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1210 return ( a.a_ * a.b_ ) + ( ( b.a_ * b.b_ ) - ( b.c_ + a.c_ ) );
1230template<
typename T1
1234 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64<T3>& b )
1236 return SIMDf64FmsubExpr<T1,T2,T3>( a.a_, a.b_, *b );
1256template<
typename T1
1261 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64MultExpr<T3,T4>& b )
1263 return SIMDf64FmsubExpr< T1, T2, SIMDf64MultExpr<T3,T4> >( a.a_, a.b_, b );
1285template<
typename T1
1290 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64<T4>& b )
1292 return ( a.a_ * a.b_ ) - ( a.c_ + (*b) );
1314template<
typename T1
1320 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64MultExpr<T4,T5>& b )
1322 return ( a.a_ * a.b_ ) - ( b.a_ * b.b_ + a.c_ );
1344template<
typename T1
1350 operator-(
const SIMDf64MultExpr<T1,T2>& a,
const SIMDf64FmsubExpr<T3,T4,T5>& b )
1352 return SIMDf64FmsubExpr< T1, T2, SIMDf64FmsubExpr<T3,T4,T5> >( a.a_, a.b_, b );
1373template<
typename T1
1380 operator-(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1382 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( b.c_ - a.c_ ) );
1403template<
typename T1
1410 operator-(
const SIMDf64FmaddExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1412 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) - ( a.c_ + b.c_ ) );
1433template<
typename T1
1440 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmaddExpr<T4,T5,T6>& b )
1442 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ + b.c_ ) );
1463template<
typename T1
1470 operator-(
const SIMDf64FmsubExpr<T1,T2,T3>& a,
const SIMDf64FmsubExpr<T4,T5,T6>& b )
1472 return ( a.a_ * a.b_ ) - ( ( b.a_ * b.b_ ) + ( a.c_ - b.c_ ) );
Header file for the basic SIMD types.
constexpr const DenseIterator< Type, AF > operator-(const DenseIterator< Type, AF > &it, ptrdiff_t inc) noexcept
Subtraction between a DenseIterator and an integral value.
Definition: DenseIterator.h:751
constexpr const DenseIterator< Type, AF > operator+(const DenseIterator< Type, AF > &it, ptrdiff_t inc) noexcept
Addition between a DenseIterator and an integral value.
Definition: DenseIterator.h:719
SIMD type for 64-bit double precision floating point data values.
SIMD type for 32-bit single precision floating point data values.
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Header file for the SIMD addition functionality.
Header file for the SIMD multiplication functionality.
Header file for the SIMD subtraction functionality.
Expression object for 32-bit floating point fused multiply-add operations.
Definition: FMA.h:71
SIMDf32< This > BaseType
Base type of this SIMDf32FMaddExpr instance.
Definition: FMA.h:74
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
BLAZE_ALWAYS_INLINE SIMDf32FmaddExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf32FmaddExpr class.
Definition: FMA.h:84
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:115
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:116
const T3 c_
The right-hand side operand for the addition.
Definition: FMA.h:117
Expression object for 32-bit floating point fused multiply-subtract operations.
Definition: FMA.h:135
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:179
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:180
SIMDf32< This > BaseType
Base type of this SIMDf32FMsubExpr instance.
Definition: FMA.h:138
BLAZE_ALWAYS_INLINE SIMDf32FmsubExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf32FmsubExpr class.
Definition: FMA.h:148
const T3 c_
The right-hand side operand for the subtraction.
Definition: FMA.h:181
Expression object for 32-bit floating point multiplication operations.
Definition: Mult.h:756
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:798
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:799
Addition operator for fusing a 32-bit floating point multiplication and addition.
Definition: FMA.h:785
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:830
const T3 c_
The right-hand side operand for the addition.
Definition: FMA.h:831
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:829
SIMDf64< This > BaseType
Base type of this SIMDf64FMaddExpr instance.
Definition: FMA.h:788
BLAZE_ALWAYS_INLINE SIMDf64FmaddExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf64FmaddExpr class.
Definition: FMA.h:798
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
Expression object for 64-bit floating point fused multiply-subtract operations.
Definition: FMA.h:849
BLAZE_ALWAYS_INLINE SIMDf64FmsubExpr(const T1 &a, const T2 &b, const T3 &c)
Constructor for the SIMDf64FmsubExpr class.
Definition: FMA.h:862
const T1 a_
The left-hand side operand for the multiplication.
Definition: FMA.h:893
SIMDf64< This > BaseType
Base type of this SIMDf64FMsubExpr instance.
Definition: FMA.h:852
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
const T2 b_
The right-hand side operand for the multiplication.
Definition: FMA.h:894
const T3 c_
The right-hand side operand for the subtraction.
Definition: FMA.h:895
Expression object for 64-bit floating point multiplication operations.
Definition: Mult.h:951
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:993
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:994
System settings for the inline keywords.
System settings for the SSE mode.