35 #ifndef _BLAZE_MATH_SIMD_MULTIPLICATION_H_ 36 #define _BLAZE_MATH_SIMD_MULTIPLICATION_H_ 66 template<
typename T >
68 operator*(
const SIMDi16<T>& a,
const SIMDi16<T>& b ) noexcept
69 #if BLAZE_AVX512BW_MODE 71 return _mm512_mullo_epi16( (~a).value, (~b).value );
75 return _mm256_mullo_epi16( (~a).value, (~b).value );
79 return _mm_mullo_epi16( (~a).value, (~b).value );
100 operator*(
const SIMDi16<T1>& a,
const SIMDi16<T2>& b ) noexcept
101 #if BLAZE_AVX512BW_MODE 103 return _mm512_mullo_epi16( (~a).value, (~b).value );
105 #elif BLAZE_AVX2_MODE 107 return _mm256_mullo_epi16( (~a).value, (~b).value );
109 #elif BLAZE_SSE2_MODE 111 return _mm_mullo_epi16( (~a).value, (~b).value );
130 operator*(
const SIMDcint16& a,
const SIMDint16& b ) noexcept
131 #if BLAZE_AVX512BW_MODE 133 return _mm512_mullo_epi16( (~a).value, (~b).value );
135 #elif BLAZE_AVX2_MODE 137 return _mm256_mullo_epi16( (~a).value, (~b).value );
139 #elif BLAZE_SSE2_MODE 141 return _mm_mullo_epi16( (~a).value, (~b).value );
160 operator*(
const SIMDcuint16& a,
const SIMDuint16& b ) noexcept
161 #if BLAZE_AVX512BW_MODE 163 return _mm512_mullo_epi16( (~a).value, (~b).value );
165 #elif BLAZE_AVX2_MODE 167 return _mm256_mullo_epi16( (~a).value, (~b).value );
169 #elif BLAZE_SSE2_MODE 171 return _mm_mullo_epi16( (~a).value, (~b).value );
190 operator*(
const SIMDint16& a,
const SIMDcint16& b ) noexcept
191 #if BLAZE_AVX512BW_MODE 193 return _mm512_mullo_epi16( (~a).value, (~b).value );
195 #elif BLAZE_AVX2_MODE 197 return _mm256_mullo_epi16( (~a).value, (~b).value );
199 #elif BLAZE_SSE2_MODE 201 return _mm_mullo_epi16( (~a).value, (~b).value );
220 operator*(
const SIMDuint16& a,
const SIMDcuint16& b ) noexcept
221 #if BLAZE_AVX512BW_MODE 223 return _mm512_mullo_epi16( (~a).value, (~b).value );
225 #elif BLAZE_AVX2_MODE 227 return _mm256_mullo_epi16( (~a).value, (~b).value );
229 #elif BLAZE_SSE2_MODE 231 return _mm_mullo_epi16( (~a).value, (~b).value );
249 template<
typename T >
251 operator*(
const SIMDci16<T>& a,
const SIMDci16<T>& b ) noexcept
252 #if BLAZE_AVX512BW_MODE 254 __m512i a_ii = _mm512_shufflelo_epi16( (~a).value, 0b11
'11'01
'01 ); 255 a_ii = _mm512_shufflehi_epi16( a_ii, 0b11'11
'01'01 );
257 __m512i b_ri = _mm512_shufflelo_epi16( (~b).value, 0b10
'11'00
'01 ); 258 b_ri = _mm512_shufflehi_epi16( b_ri, 0b10'11
'00'01 );
260 __m512i a_rr = _mm512_shufflelo_epi16( (~a).value, 0b10
'10'00
'00 ); 261 a_rr = _mm512_shufflehi_epi16( a_rr, 0b10'10
'00'00 );
263 const __m512i a_rr_b = _mm512_mullo_epi16( a_rr, (~b).value );
264 const __m512i a_ii_b_ri = _mm512_mullo_epi16( a_ii, b_ri );
265 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi16( a_ii_b_ri, 0x55555555,
266 _mm512_setzero_si512(), a_ii_b_ri );
267 return _mm512_add_epi16( a_rr_b, a_ii_b_ri_signed );
269 #elif BLAZE_AVX2_MODE 272 const __m256i neg( _mm256_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
274 x = _mm256_shufflelo_epi16( (~a).value, 0xA0 );
275 x = _mm256_shufflehi_epi16( x, 0xA0 );
276 z = _mm256_mullo_epi16( x, (~b).value );
277 x = _mm256_shufflelo_epi16( (~a).value, 0xF5 );
278 x = _mm256_shufflehi_epi16( x, 0xF5 );
279 y = _mm256_shufflelo_epi16( (~b).value, 0xB1 );
280 y = _mm256_shufflehi_epi16( y, 0xB1 );
281 y = _mm256_mullo_epi16( x, y );
282 y = _mm256_mullo_epi16( y, neg );
283 return _mm256_add_epi16( z, y );
285 #elif BLAZE_SSE2_MODE 288 const __m128i neg( _mm_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1 ) );
290 x = _mm_shufflelo_epi16( (~a).value, 0xA0 );
291 x = _mm_shufflehi_epi16( x, 0xA0 );
292 z = _mm_mullo_epi16( x, (~b).value );
293 x = _mm_shufflelo_epi16( (~a).value, 0xF5 );
294 x = _mm_shufflehi_epi16( x, 0xF5 );
295 y = _mm_shufflelo_epi16( (~b).value, 0xB1 );
296 y = _mm_shufflehi_epi16( y, 0xB1 );
297 y = _mm_mullo_epi16( x, y );
298 y = _mm_mullo_epi16( y, neg );
299 return _mm_add_epi16( z, y );
325 template<
typename T >
327 operator*(
const SIMDi32<T>& a,
const SIMDi32<T>& b ) noexcept
328 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 330 return _mm512_mullo_epi32( (~a).value, (~b).value );
332 #elif BLAZE_AVX2_MODE 334 return _mm256_mullo_epi32( (~a).value, (~b).value );
336 #elif BLAZE_SSE4_MODE 338 return _mm_mullo_epi32( (~a).value, (~b).value );
356 template<
typename T1
359 operator*(
const SIMDi32<T1>& a,
const SIMDi32<T2>& b ) noexcept
360 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 362 return _mm512_mullo_epi32( (~a).value, (~b).value );
364 #elif BLAZE_AVX2_MODE 366 return _mm256_mullo_epi32( (~a).value, (~b).value );
368 #elif BLAZE_SSE4_MODE 370 return _mm_mullo_epi32( (~a).value, (~b).value );
389 operator*(
const SIMDcint32& a,
const SIMDint32& b ) noexcept
390 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 392 return _mm512_mullo_epi32( (~a).value, (~b).value );
394 #elif BLAZE_AVX2_MODE 396 return _mm256_mullo_epi32( (~a).value, (~b).value );
398 #elif BLAZE_SSE4_MODE 400 return _mm_mullo_epi32( (~a).value, (~b).value );
419 operator*(
const SIMDcuint32& a,
const SIMDuint32& b ) noexcept
420 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 422 return _mm512_mullo_epi32( (~a).value, (~b).value );
424 #elif BLAZE_AVX2_MODE 426 return _mm256_mullo_epi32( (~a).value, (~b).value );
428 #elif BLAZE_SSE4_MODE 430 return _mm_mullo_epi32( (~a).value, (~b).value );
448 template<
typename T1
451 operator*(
const SIMDint32& a,
const SIMDcint32& b ) noexcept
452 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 454 return _mm512_mullo_epi32( (~a).value, (~b).value );
456 #elif BLAZE_AVX2_MODE 458 return _mm256_mullo_epi32( (~a).value, (~b).value );
460 #elif BLAZE_SSE4_MODE 462 return _mm_mullo_epi32( (~a).value, (~b).value );
480 template<
typename T1
483 operator*(
const SIMDuint32& a,
const SIMDcuint32& b ) noexcept
484 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 486 return _mm512_mullo_epi32( (~a).value, (~b).value );
488 #elif BLAZE_AVX2_MODE 490 return _mm256_mullo_epi32( (~a).value, (~b).value );
492 #elif BLAZE_SSE4_MODE 494 return _mm_mullo_epi32( (~a).value, (~b).value );
512 template<
typename T >
514 operator*(
const SIMDci32<T>& a,
const SIMDci32<T>& b ) noexcept
515 #if BLAZE_AVX512F_MODE 517 const __m512i a_ii = _mm512_shuffle_epi32( (~a).value, 0b11
'11'01
'01 ); 518 const __m512i b_ri = _mm512_shuffle_epi32( (~b).value, 0b10'11
'00'01 );
519 const __m512i a_rr = _mm512_shuffle_epi32( (~a).value, 0b10
'10'00
'00 ); 521 const __m512i a_rr_b = _mm512_mullo_epi32( a_rr, (~b).value ); 522 const __m512i a_ii_b_ri = _mm512_mullo_epi32( a_ii, b_ri ); 523 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi32( a_ii_b_ri, 0b0101010101010101, 524 _mm512_setzero_si512(), a_ii_b_ri ); 525 return _mm512_add_epi32( a_rr_b, a_ii_b_ri_signed ); 527 #elif BLAZE_AVX2_MODE 530 const __m256i neg( _mm256_set_epi32( 1, -1, 1, -1, 1, -1, 1, -1 ) ); 532 x = _mm256_shuffle_epi32( (~a).value, 0xA0 ); 533 z = _mm256_mullo_epi32( x, (~b).value ); 534 x = _mm256_shuffle_epi32( (~a).value, 0xF5 ); 535 y = _mm256_shuffle_epi32( (~b).value, 0xB1 ); 536 y = _mm256_mullo_epi32( x, y ); 537 y = _mm256_mullo_epi32( y, neg ); 538 return _mm256_add_epi32( z, y ); 540 #elif BLAZE_SSE4_MODE 543 const __m128i neg( _mm_set_epi32( 1, -1, 1, -1 ) ); 545 x = _mm_shuffle_epi32( (~a).value, 0xA0 ); 546 z = _mm_mullo_epi32( x, (~b).value ); 547 x = _mm_shuffle_epi32( (~a).value, 0xF5 ); 548 y = _mm_shuffle_epi32( (~b).value, 0xB1 ); 549 y = _mm_mullo_epi32( x, y ); 550 y = _mm_mullo_epi32( y, neg ); 551 return _mm_add_epi32( z, y ); 556 //************************************************************************************************* 561 //================================================================================================= 563 // 64-BIT INTEGRAL SIMD TYPES 565 //================================================================================================= 567 //************************************************************************************************* 577 template< typename T > // Type of both operands 578 BLAZE_ALWAYS_INLINE const T 579 operator*( const SIMDi64<T>& a, const SIMDi64<T>& b ) noexcept 580 #if BLAZE_AVX512DQ_MODE 582 return _mm512_mullo_epi64( (~a).value, (~b).value ); 587 //************************************************************************************************* 590 //************************************************************************************************* 600 template< typename T1 // Type of the left-hand side operand 601 , typename T2 > // Type of the right-hand side operand 602 BLAZE_ALWAYS_INLINE const SIMDuint64 603 operator*( const SIMDi64<T1>& a, const SIMDi64<T2>& b ) noexcept 604 #if BLAZE_AVX512DQ_MODE 606 return _mm512_mullo_epi64( (~a).value, (~b).value ); 611 //************************************************************************************************* 614 //************************************************************************************************* 624 BLAZE_ALWAYS_INLINE const SIMDcint64 625 operator*( const SIMDcint64& a, const SIMDint64& b ) noexcept 626 #if BLAZE_AVX512DQ_MODE 628 return _mm512_mullo_epi64( (~a).value, (~b).value ); 633 //************************************************************************************************* 636 //************************************************************************************************* 646 BLAZE_ALWAYS_INLINE const SIMDcuint64 647 operator*( const SIMDcuint64& a, const SIMDuint64& b ) noexcept 648 #if BLAZE_AVX512DQ_MODE 650 return _mm512_mullo_epi64( (~a).value, (~b).value ); 655 //************************************************************************************************* 658 //************************************************************************************************* 668 template< typename T1 // Type of the left-hand side operand 669 , typename T2 > // Type of the right-hand side operand 670 BLAZE_ALWAYS_INLINE const SIMDcint64 671 operator*( const SIMDint64& a, const SIMDcint64& b ) noexcept 672 #if BLAZE_AVX512DQ_MODE 674 return _mm512_mullo_epi64( (~a).value, (~b).value ); 679 //************************************************************************************************* 682 //************************************************************************************************* 692 template< typename T1 // Type of the left-hand side operand 693 , typename T2 > // Type of the right-hand side operand 694 BLAZE_ALWAYS_INLINE const SIMDcuint64 695 operator*( const SIMDuint64& a, const SIMDcuint64& b ) noexcept 696 #if BLAZE_AVX512DQ_MODE || BLAZE_MIC_MODE 698 return _mm512_mullo_epi64( (~a).value, (~b).value ); 703 //************************************************************************************************* 706 //************************************************************************************************* 716 template< typename T > // Type of both operands 717 BLAZE_ALWAYS_INLINE const T 718 operator*( const SIMDci64<T>& a, const SIMDci64<T>& b ) noexcept 719 #if BLAZE_AVX512DQ_MODE 721 const __m512i a_ii = _mm512_shuffle_epi32( (~a).value, 0b11'10
'11'10 );
722 const __m512i b_ri = _mm512_shuffle_epi32( (~b).value, 0b01
'00'11
'10 ); 723 const __m512i a_rr = _mm512_shuffle_epi32( (~a).value, 0b01'00
'01'00 );
725 const __m512i a_rr_b = _mm512_mullo_epi64( a_rr, (~b).value );
726 const __m512i a_ii_b_ri = _mm512_mullo_epi64( a_ii, b_ri );
727 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi64( a_ii_b_ri, 0b01010101,
728 _mm512_setzero_si512(), a_ii_b_ri );
729 return _mm512_add_epi64( a_rr_b, a_ii_b_ri_signed );
752 template<
typename T1
755 :
public SIMDf32< SIMDf32MultExpr<T1,T2> >
780 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 782 return _mm512_mul_ps(
a_.eval().value,
b_.eval().value );
786 return _mm256_mul_ps(
a_.eval().value,
b_.eval().value );
790 return _mm_mul_ps(
a_.eval().value,
b_.eval().value );
815 template<
typename T1
818 operator*(
const SIMDf32<T1>& a,
const SIMDf32<T2>& b ) noexcept
837 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 839 return _mm512_mul_ps( a.value, b.value );
843 return _mm256_mul_ps( a.value, b.value );
847 return _mm_mul_ps( a.value, b.value );
867 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 869 return _mm512_mul_ps( a.value, b.value );
873 return _mm256_mul_ps( a.value, b.value );
877 return _mm_mul_ps( a.value, b.value );
897 #if BLAZE_AVX512F_MODE 899 const __m512 a_ii = _mm512_permute_ps( a.value, 0b11
'11'01
'01 ); 900 const __m512 b_ri = _mm512_permute_ps( b.value, 0b10'11
'00'01 );
901 const __m512 a_rr = _mm512_permute_ps( a.value, 0b10
'10'00
'00 ); 902 return _mm512_fmaddsub_ps( a_rr, b.value, _mm512_mul_ps( a_ii, b_ri ) ); 908 x = _mm256_shuffle_ps( a.value, a.value, 0xA0 ); 909 z = _mm256_mul_ps( x, b.value ); 910 x = _mm256_shuffle_ps( a.value, a.value, 0xF5 ); 911 y = _mm256_shuffle_ps( b.value, b.value, 0xB1 ); 912 y = _mm256_mul_ps( x, y ); 913 return _mm256_addsub_ps( z, y ); 915 #elif BLAZE_SSE3_MODE 919 x = _mm_shuffle_ps( a.value, a.value, 0xA0 ); 920 z = _mm_mul_ps( x, b.value ); 921 x = _mm_shuffle_ps( a.value, a.value, 0xF5 ); 922 y = _mm_shuffle_ps( b.value, b.value, 0xB1 ); 923 y = _mm_mul_ps( x, y ); 924 return _mm_addsub_ps( z, y ); 929 //************************************************************************************************* 934 //================================================================================================= 936 // 64-BIT FLOATING POINT SIMD TYPES 938 //================================================================================================= 940 //************************************************************************************************* 947 template< typename T1 // Type of the left-hand side operand 948 , typename T2 > // Type of the right-hand side operand 949 struct SIMDf64MultExpr 950 : public SIMDf64< SIMDf64MultExpr<T1,T2> > 952 //**Type definitions**************************************************************************** 953 using This = SIMDf64MultExpr<T1,T2>; //!< Type of this SIMDf64MultExpr instance. 954 using BaseType = SIMDf64<This>; //!< Base type of this SIMDf64MultExpr instance. 955 //********************************************************************************************** 957 //**Constructor********************************************************************************* 963 explicit BLAZE_ALWAYS_INLINE SIMDf64MultExpr( const T1& a, const T2& b ) 964 : a_( a ) // The left-hand side operand for the multiplication 965 , b_( b ) // The right-hand side operand for the multiplication 967 //********************************************************************************************** 969 //**Evaluation function************************************************************************* 974 BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept 975 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 977 return _mm512_mul_pd( a_.eval().value, b_.eval().value ); 981 return _mm256_mul_pd( a_.eval().value, b_.eval().value ); 983 #elif BLAZE_SSE2_MODE 985 return _mm_mul_pd( a_.eval().value, b_.eval().value ); 990 //********************************************************************************************** 992 //**Member variables**************************************************************************** 993 const T1 a_; //!< The left-hand side operand for the multiplication. 994 const T2 b_; //!< The right-hand side operand for the multiplication. 995 //********************************************************************************************** 997 //************************************************************************************************* 1000 //************************************************************************************************* 1010 template< typename T1 // Type of the left-hand side operand 1011 , typename T2 > // Type of the right-hand side operand 1012 BLAZE_ALWAYS_INLINE const SIMDf64MultExpr<T1,T2> 1013 operator*( const SIMDf64<T1>& a, const SIMDf64<T2>& b ) noexcept 1015 return SIMDf64MultExpr<T1,T2>( ~a, ~b ); 1017 //************************************************************************************************* 1020 //************************************************************************************************* 1030 BLAZE_ALWAYS_INLINE const SIMDcdouble 1031 operator*( const SIMDcdouble& a, const SIMDdouble& b ) noexcept 1032 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 1034 return _mm512_mul_pd( a.value, b.value ); 1036 #elif BLAZE_AVX_MODE 1038 return _mm256_mul_pd( a.value, b.value ); 1040 #elif BLAZE_SSE2_MODE 1042 return _mm_mul_pd( a.value, b.value ); 1047 //************************************************************************************************* 1050 //************************************************************************************************* 1060 BLAZE_ALWAYS_INLINE const SIMDcdouble 1061 operator*( const SIMDdouble& a, const SIMDcdouble& b ) noexcept 1062 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 1064 return _mm512_mul_pd( a.value, b.value ); 1066 #elif BLAZE_AVX_MODE 1068 return _mm256_mul_pd( a.value, b.value ); 1070 #elif BLAZE_SSE2_MODE 1072 return _mm_mul_pd( a.value, b.value ); 1077 //************************************************************************************************* 1080 //************************************************************************************************* 1090 BLAZE_ALWAYS_INLINE const SIMDcdouble 1091 operator*( const SIMDcdouble& a, const SIMDcdouble& b ) noexcept 1092 #if BLAZE_AVX512F_MODE 1094 const __m512d a_ii = _mm512_permute_pd( a.value, 0b1'1
'1'1
'1'1
'1'1 );
1095 const __m512d b_ri = _mm512_permute_pd( b.value, 0b0
'1'0
'1'0
'1'0
'1 ); 1096 const __m512d a_rr = _mm512_permute_pd( a.value, 0 ); 1097 return _mm512_fmaddsub_pd( a_rr, b.value, _mm512_mul_pd( a_ii, b_ri ) ); 1099 #elif BLAZE_AVX_MODE 1103 x = _mm256_shuffle_pd( a.value, a.value, 0 ); 1104 z = _mm256_mul_pd( x, b.value ); 1105 x = _mm256_shuffle_pd( a.value, a.value, 15 ); 1106 y = _mm256_shuffle_pd( b.value, b.value, 5 ); 1107 y = _mm256_mul_pd( x, y ); 1108 return _mm256_addsub_pd( z, y ); 1110 #elif BLAZE_SSE3_MODE 1114 x = _mm_shuffle_pd( a.value, a.value, 0 ); 1115 z = _mm_mul_pd( x, b.value ); 1116 x = _mm_shuffle_pd( a.value, a.value, 3 ); 1117 y = _mm_shuffle_pd( b.value, b.value, 1 ); 1118 y = _mm_mul_pd( x, y ); 1119 return _mm_addsub_pd( z, y ); 1124 //************************************************************************************************* 1126 } // namespace blaze SIMDf32< This > BaseType
Base type of this SIMDf32MultExpr instance.
Definition: Multiplication.h:759
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
decltype(auto) operator*(const DenseMatrix< MT1, false > &lhs, const DenseMatrix< MT2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8893
Header file for the basic SIMD types.
BLAZE_ALWAYS_INLINE SIMDf32MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf32MultExpr class.
Definition: Multiplication.h:768
const T1 a_
The left-hand side operand for the multiplication.
Definition: Multiplication.h:798
SIMD type for 32-bit single precision complex values.
SIMD type for 32-bit single precision floating point data values.
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
System settings for the SSE mode.
Expression object for 32-bit floating point multiplication operations.The SIMDf32MultExpr class repre...
Definition: Multiplication.h:754
const T2 b_
The right-hand side operand for the multiplication.
Definition: Multiplication.h:799
System settings for the inline keywords.