35 #ifndef _BLAZE_MATH_SIMD_MULTIPLICATION_H_
36 #define _BLAZE_MATH_SIMD_MULTIPLICATION_H_
66 template<
typename T >
68 operator*(
const SIMDi16<T>& a,
const SIMDi16<T>& b ) noexcept
71 return _mm256_mullo_epi16( (~a).value, (~b).value );
75 return _mm_mullo_epi16( (~a).value, (~b).value );
96 operator*(
const SIMDi16<T1>& a,
const SIMDi16<T2>& b ) noexcept
99 return _mm256_mullo_epi16( (~a).value, (~b).value );
101 #elif BLAZE_SSE2_MODE
103 return _mm_mullo_epi16( (~a).value, (~b).value );
122 operator*(
const SIMDcint16& a,
const SIMDint16& b ) noexcept
125 return _mm256_mullo_epi16( (~a).value, (~b).value );
127 #elif BLAZE_SSE2_MODE
129 return _mm_mullo_epi16( (~a).value, (~b).value );
148 operator*(
const SIMDcuint16& a,
const SIMDuint16& b ) noexcept
151 return _mm256_mullo_epi16( (~a).value, (~b).value );
153 #elif BLAZE_SSE2_MODE
155 return _mm_mullo_epi16( (~a).value, (~b).value );
174 operator*(
const SIMDint16& a,
const SIMDcint16& b ) noexcept
177 return _mm256_mullo_epi16( (~a).value, (~b).value );
179 #elif BLAZE_SSE2_MODE
181 return _mm_mullo_epi16( (~a).value, (~b).value );
200 operator*(
const SIMDuint16& a,
const SIMDcuint16& b ) noexcept
203 return _mm256_mullo_epi16( (~a).value, (~b).value );
205 #elif BLAZE_SSE2_MODE
207 return _mm_mullo_epi16( (~a).value, (~b).value );
225 template<
typename T >
227 operator*(
const SIMDci16<T>& a,
const SIMDci16<T>& b ) noexcept
231 const __m256i neg( _mm256_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
233 x = _mm256_shufflelo_epi16( (~a).value, 0xA0 );
234 x = _mm256_shufflehi_epi16( x, 0xA0 );
235 z = _mm256_mullo_epi16( x, (~b).value );
236 x = _mm256_shufflelo_epi16( (~a).value, 0xF5 );
237 x = _mm256_shufflehi_epi16( x, 0xF5 );
238 y = _mm256_shufflelo_epi16( (~b).value, 0xB1 );
239 y = _mm256_shufflehi_epi16( y, 0xB1 );
240 y = _mm256_mullo_epi16( x, y );
241 y = _mm256_mullo_epi16( y, neg );
242 return _mm256_add_epi16( z, y );
244 #elif BLAZE_SSE2_MODE
247 const __m128i neg( _mm_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1 ) );
249 x = _mm_shufflelo_epi16( (~a).value, 0xA0 );
250 x = _mm_shufflehi_epi16( x, 0xA0 );
251 z = _mm_mullo_epi16( x, (~b).value );
252 x = _mm_shufflelo_epi16( (~a).value, 0xF5 );
253 x = _mm_shufflehi_epi16( x, 0xF5 );
254 y = _mm_shufflelo_epi16( (~b).value, 0xB1 );
255 y = _mm_shufflehi_epi16( y, 0xB1 );
256 y = _mm_mullo_epi16( x, y );
257 y = _mm_mullo_epi16( y, neg );
258 return _mm_add_epi16( z, y );
284 template<
typename T >
286 operator*(
const SIMDi32<T>& a,
const SIMDi32<T>& b ) noexcept
289 return _mm512_mullo_epi32( (~a).value, (~b).value );
291 #elif BLAZE_AVX2_MODE
293 return _mm256_mullo_epi32( (~a).value, (~b).value );
295 #elif BLAZE_SSE4_MODE
297 return _mm_mullo_epi32( (~a).value, (~b).value );
315 template<
typename T1
318 operator*(
const SIMDi32<T1>& a,
const SIMDi32<T2>& b ) noexcept
321 return _mm512_mullo_epi32( (~a).value, (~b).value );
323 #elif BLAZE_AVX2_MODE
325 return _mm256_mullo_epi32( (~a).value, (~b).value );
327 #elif BLAZE_SSE4_MODE
329 return _mm_mullo_epi32( (~a).value, (~b).value );
348 operator*(
const SIMDcint32& a,
const SIMDint32& b ) noexcept
351 return _mm512_mullo_epi32( (~a).value, (~b).value );
353 #elif BLAZE_AVX2_MODE
355 return _mm256_mullo_epi32( (~a).value, (~b).value );
357 #elif BLAZE_SSE4_MODE
359 return _mm_mullo_epi32( (~a).value, (~b).value );
378 operator*(
const SIMDcuint32& a,
const SIMDuint32& b ) noexcept
381 return _mm512_mullo_epi32( (~a).value, (~b).value );
383 #elif BLAZE_AVX2_MODE
385 return _mm256_mullo_epi32( (~a).value, (~b).value );
387 #elif BLAZE_SSE4_MODE
389 return _mm_mullo_epi32( (~a).value, (~b).value );
407 template<
typename T1
410 operator*(
const SIMDint32& a,
const SIMDcint32& b ) noexcept
413 return _mm512_mullo_epi32( (~a).value, (~b).value );
415 #elif BLAZE_AVX2_MODE
417 return _mm256_mullo_epi32( (~a).value, (~b).value );
419 #elif BLAZE_SSE4_MODE
421 return _mm_mullo_epi32( (~a).value, (~b).value );
439 template<
typename T1
442 operator*(
const SIMDuint32& a,
const SIMDcuint32& b ) noexcept
445 return _mm512_mullo_epi32( (~a).value, (~b).value );
447 #elif BLAZE_AVX2_MODE
449 return _mm256_mullo_epi32( (~a).value, (~b).value );
451 #elif BLAZE_SSE4_MODE
453 return _mm_mullo_epi32( (~a).value, (~b).value );
471 template<
typename T >
473 operator*(
const SIMDci32<T>& a,
const SIMDci32<T>& b ) noexcept
477 const __m256i neg( _mm256_set_epi32( 1, -1, 1, -1, 1, -1, 1, -1 ) );
479 x = _mm256_shuffle_epi32( (~a).value, 0xA0 );
480 z = _mm256_mullo_epi32( x, (~b).value );
481 x = _mm256_shuffle_epi32( (~a).value, 0xF5 );
482 y = _mm256_shuffle_epi32( (~b).value, 0xB1 );
483 y = _mm256_mullo_epi32( x, y );
484 y = _mm256_mullo_epi32( y, neg );
485 return _mm256_add_epi32( z, y );
487 #elif BLAZE_SSE4_MODE
490 const __m128i neg( _mm_set_epi32( 1, -1, 1, -1 ) );
492 x = _mm_shuffle_epi32( (~a).value, 0xA0 );
493 z = _mm_mullo_epi32( x, (~b).value );
494 x = _mm_shuffle_epi32( (~a).value, 0xF5 );
495 y = _mm_shuffle_epi32( (~b).value, 0xB1 );
496 y = _mm_mullo_epi32( x, y );
497 y = _mm_mullo_epi32( y, neg );
498 return _mm_add_epi32( z, y );
521 template<
typename T1
550 return _mm512_mul_ps(
a_.eval().value,
b_.eval().value );
554 return _mm256_mul_ps(
a_.eval().value,
b_.eval().value );
558 return _mm_mul_ps(
a_.eval().value,
b_.eval().value );
583 template<
typename T1
586 operator*(
const SIMDf32<T1>& a,
const SIMDf32<T2>& b ) noexcept
604 operator*(
const SIMDcfloat& a,
const SIMDfloat& b ) noexcept
607 return _mm512_mul_ps( a.value, b.value );
611 return _mm256_mul_ps( a.value, b.value );
615 return _mm_mul_ps( a.value, b.value );
634 operator*(
const SIMDfloat& a,
const SIMDcfloat& b ) noexcept
637 return _mm512_mul_ps( a.value, b.value );
641 return _mm256_mul_ps( a.value, b.value );
645 return _mm_mul_ps( a.value, b.value );
664 operator*(
const SIMDcfloat& a,
const SIMDcfloat& b ) noexcept
669 x = _mm256_shuffle_ps( a.value, a.value, 0xA0 );
670 z = _mm256_mul_ps( x, b.value );
671 x = _mm256_shuffle_ps( a.value, a.value, 0xF5 );
672 y = _mm256_shuffle_ps( b.value, b.value, 0xB1 );
673 y = _mm256_mul_ps( x, y );
674 return _mm256_addsub_ps( z, y );
676 #elif BLAZE_SSE3_MODE
680 x = _mm_shuffle_ps( a.value, a.value, 0xA0 );
681 z = _mm_mul_ps( x, b.value );
682 x = _mm_shuffle_ps( a.value, a.value, 0xF5 );
683 y = _mm_shuffle_ps( b.value, b.value, 0xB1 );
684 y = _mm_mul_ps( x, y );
685 return _mm_addsub_ps( z, y );
708 template<
typename T1
737 return _mm512_mul_pd(
a_.eval().value,
b_.eval().value );
741 return _mm256_mul_pd(
a_.eval().value,
b_.eval().value );
743 #elif BLAZE_SSE2_MODE
745 return _mm_mul_pd(
a_.eval().value,
b_.eval().value );
770 template<
typename T1
773 operator*(
const SIMDf64<T1>& a,
const SIMDf64<T2>& b ) noexcept
791 operator*(
const SIMDcdouble& a,
const SIMDdouble& b ) noexcept
794 return _mm512_mul_pd( a.value, b.value );
798 return _mm256_mul_pd( a.value, b.value );
800 #elif BLAZE_SSE2_MODE
802 return _mm_mul_pd( a.value, b.value );
821 operator*(
const SIMDdouble& a,
const SIMDcdouble& b ) noexcept
824 return _mm512_mul_pd( a.value, b.value );
828 return _mm256_mul_pd( a.value, b.value );
830 #elif BLAZE_SSE2_MODE
832 return _mm_mul_pd( a.value, b.value );
851 operator*(
const SIMDcdouble& a,
const SIMDcdouble& b ) noexcept
856 x = _mm256_shuffle_pd( a.value, a.value, 0 );
857 z = _mm256_mul_pd( x, b.value );
858 x = _mm256_shuffle_pd( a.value, a.value, 15 );
859 y = _mm256_shuffle_pd( b.value, b.value, 5 );
860 y = _mm256_mul_pd( x, y );
861 return _mm256_addsub_pd( z, y );
863 #elif BLAZE_SSE3_MODE
867 x = _mm_shuffle_pd( a.value, a.value, 0 );
868 z = _mm_mul_pd( x, b.value );
869 x = _mm_shuffle_pd( a.value, a.value, 3 );
870 y = _mm_shuffle_pd( b.value, b.value, 1 );
871 y = _mm_mul_pd( x, y );
872 return _mm_addsub_pd( z, y );
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7800
Expression object for 64-bit floating point multiplication operations.The SIMDf64MultExpr class repre...
Definition: Multiplication.h:710
SIMD type for 64-bit double precision floating point data values.
SIMDf32< This > BaseType
Base type of this SIMDf32MultExpr instance.
Definition: Multiplication.h:527
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
const T2 b_
The right-hand side operand for the multiplication.
Definition: Multiplication.h:754
Header file for the basic SIMD types.
BLAZE_ALWAYS_INLINE SIMDf32MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf32MultExpr class.
Definition: Multiplication.h:536
const T1 a_
The left-hand side operand for the multiplication.
Definition: Multiplication.h:566
SIMD type for 32-bit single precision floating point data values.
BLAZE_ALWAYS_INLINE SIMDf64MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf64MultExpr class.
Definition: Multiplication.h:723
System settings for the SSE mode.
Expression object for 32-bit floating point multiplication operations.The SIMDf32MultExpr class repre...
Definition: Multiplication.h:523
const T2 b_
The right-hand side operand for the multiplication.
Definition: Multiplication.h:567
SIMDf64< This > BaseType
Base type of this SIMDf64MultExpr instance.
Definition: Multiplication.h:714
System settings for the inline keywords.
const T1 a_
The left-hand side operand for the multiplication.
Definition: Multiplication.h:753