35#ifndef _BLAZE_MATH_SIMD_MULT_H_
36#define _BLAZE_MATH_SIMD_MULT_H_
68 operator*(
const SIMDi16<T>& a,
const SIMDi16<T>& b )
noexcept
69#if BLAZE_AVX512BW_MODE
71 return _mm512_mullo_epi16( (*a).value, (*b).value );
75 return _mm256_mullo_epi16( (*a).value, (*b).value );
79 return _mm_mullo_epi16( (*a).value, (*b).value );
100 operator*(
const SIMDi16<T1>& a,
const SIMDi16<T2>& b )
noexcept
101#if BLAZE_AVX512BW_MODE
103 return _mm512_mullo_epi16( (*a).value, (*b).value );
107 return _mm256_mullo_epi16( (*a).value, (*b).value );
111 return _mm_mullo_epi16( (*a).value, (*b).value );
131#if BLAZE_AVX512BW_MODE
133 return _mm512_mullo_epi16( (*a).value, (*b).value );
137 return _mm256_mullo_epi16( (*a).value, (*b).value );
141 return _mm_mullo_epi16( (*a).value, (*b).value );
161#if BLAZE_AVX512BW_MODE
163 return _mm512_mullo_epi16( (*a).value, (*b).value );
167 return _mm256_mullo_epi16( (*a).value, (*b).value );
171 return _mm_mullo_epi16( (*a).value, (*b).value );
191#if BLAZE_AVX512BW_MODE
193 return _mm512_mullo_epi16( (*a).value, (*b).value );
197 return _mm256_mullo_epi16( (*a).value, (*b).value );
201 return _mm_mullo_epi16( (*a).value, (*b).value );
221#if BLAZE_AVX512BW_MODE
223 return _mm512_mullo_epi16( (*a).value, (*b).value );
227 return _mm256_mullo_epi16( (*a).value, (*b).value );
231 return _mm_mullo_epi16( (*a).value, (*b).value );
249template<
typename T >
251 operator*(
const SIMDci16<T>& a,
const SIMDci16<T>& b )
noexcept
252#if BLAZE_AVX512BW_MODE
254 __m512i a_ii = _mm512_shufflelo_epi16( (*a).value, 0b11'11'01'01 );
255 a_ii = _mm512_shufflehi_epi16( a_ii, 0b11'11'01'01 );
257 __m512i b_ri = _mm512_shufflelo_epi16( (*b).value, 0b10'11'00'01 );
258 b_ri = _mm512_shufflehi_epi16( b_ri, 0b10'11'00'01 );
260 __m512i a_rr = _mm512_shufflelo_epi16( (*a).value, 0b10'10'00'00 );
261 a_rr = _mm512_shufflehi_epi16( a_rr, 0b10'10'00'00 );
263 const __m512i a_rr_b = _mm512_mullo_epi16( a_rr, (*b).value );
264 const __m512i a_ii_b_ri = _mm512_mullo_epi16( a_ii, b_ri );
265 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi16( a_ii_b_ri, 0x55555555,
266 _mm512_setzero_si512(), a_ii_b_ri );
267 return _mm512_add_epi16( a_rr_b, a_ii_b_ri_signed );
272 const __m256i neg( _mm256_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
274 x = _mm256_shufflelo_epi16( (*a).value, 0xA0 );
275 x = _mm256_shufflehi_epi16( x, 0xA0 );
276 z = _mm256_mullo_epi16( x, (*b).value );
277 x = _mm256_shufflelo_epi16( (*a).value, 0xF5 );
278 x = _mm256_shufflehi_epi16( x, 0xF5 );
279 y = _mm256_shufflelo_epi16( (*b).value, 0xB1 );
280 y = _mm256_shufflehi_epi16( y, 0xB1 );
281 y = _mm256_mullo_epi16( x, y );
282 y = _mm256_mullo_epi16( y, neg );
283 return _mm256_add_epi16( z, y );
288 const __m128i neg( _mm_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1 ) );
290 x = _mm_shufflelo_epi16( (*a).value, 0xA0 );
291 x = _mm_shufflehi_epi16( x, 0xA0 );
292 z = _mm_mullo_epi16( x, (*b).value );
293 x = _mm_shufflelo_epi16( (*a).value, 0xF5 );
294 x = _mm_shufflehi_epi16( x, 0xF5 );
295 y = _mm_shufflelo_epi16( (*b).value, 0xB1 );
296 y = _mm_shufflehi_epi16( y, 0xB1 );
297 y = _mm_mullo_epi16( x, y );
298 y = _mm_mullo_epi16( y, neg );
299 return _mm_add_epi16( z, y );
325template<
typename T >
327 operator*(
const SIMDi32<T>& a,
const SIMDi32<T>& b )
noexcept
328#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
330 return _mm512_mullo_epi32( (*a).value, (*b).value );
334 return _mm256_mullo_epi32( (*a).value, (*b).value );
338 return _mm_mullo_epi32( (*a).value, (*b).value );
359 operator*(
const SIMDi32<T1>& a,
const SIMDi32<T2>& b )
noexcept
360#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
362 return _mm512_mullo_epi32( (*a).value, (*b).value );
366 return _mm256_mullo_epi32( (*a).value, (*b).value );
370 return _mm_mullo_epi32( (*a).value, (*b).value );
390#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
392 return _mm512_mullo_epi32( (*a).value, (*b).value );
396 return _mm256_mullo_epi32( (*a).value, (*b).value );
400 return _mm_mullo_epi32( (*a).value, (*b).value );
420#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
422 return _mm512_mullo_epi32( (*a).value, (*b).value );
426 return _mm256_mullo_epi32( (*a).value, (*b).value );
430 return _mm_mullo_epi32( (*a).value, (*b).value );
452#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
454 return _mm512_mullo_epi32( (*a).value, (*b).value );
458 return _mm256_mullo_epi32( (*a).value, (*b).value );
462 return _mm_mullo_epi32( (*a).value, (*b).value );
484#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
486 return _mm512_mullo_epi32( (*a).value, (*b).value );
490 return _mm256_mullo_epi32( (*a).value, (*b).value );
494 return _mm_mullo_epi32( (*a).value, (*b).value );
512template<
typename T >
514 operator*(
const SIMDci32<T>& a,
const SIMDci32<T>& b )
noexcept
515#if BLAZE_AVX512F_MODE
517 const __m512i a_ii = _mm512_shuffle_epi32( (*a).value, _MM_PERM_DDBB );
518 const __m512i b_ri = _mm512_shuffle_epi32( (*b).value, _MM_PERM_CDAB );
519 const __m512i a_rr = _mm512_shuffle_epi32( (*a).value, _MM_PERM_CCAA );
521 const __m512i a_rr_b = _mm512_mullo_epi32( a_rr, (*b).value );
522 const __m512i a_ii_b_ri = _mm512_mullo_epi32( a_ii, b_ri );
523 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi32( a_ii_b_ri, 0b0101010101010101,
524 _mm512_setzero_si512(), a_ii_b_ri );
525 return _mm512_add_epi32( a_rr_b, a_ii_b_ri_signed );
530 const __m256i neg( _mm256_set_epi32( 1, -1, 1, -1, 1, -1, 1, -1 ) );
532 x = _mm256_shuffle_epi32( (*a).value, 0xA0 );
533 z = _mm256_mullo_epi32( x, (*b).value );
534 x = _mm256_shuffle_epi32( (*a).value, 0xF5 );
535 y = _mm256_shuffle_epi32( (*b).value, 0xB1 );
536 y = _mm256_mullo_epi32( x, y );
537 y = _mm256_mullo_epi32( y, neg );
538 return _mm256_add_epi32( z, y );
543 const __m128i neg( _mm_set_epi32( 1, -1, 1, -1 ) );
545 x = _mm_shuffle_epi32( (*a).value, 0xA0 );
546 z = _mm_mullo_epi32( x, (*b).value );
547 x = _mm_shuffle_epi32( (*a).value, 0xF5 );
548 y = _mm_shuffle_epi32( (*b).value, 0xB1 );
549 y = _mm_mullo_epi32( x, y );
550 y = _mm_mullo_epi32( y, neg );
551 return _mm_add_epi32( z, y );
577template<
typename T >
579 operator*(
const SIMDi64<T>& a,
const SIMDi64<T>& b )
noexcept
580#if BLAZE_AVX512DQ_MODE
582 return _mm512_mullo_epi64( (*a).value, (*b).value );
603 operator*(
const SIMDi64<T1>& a,
const SIMDi64<T2>& b )
noexcept
604#if BLAZE_AVX512DQ_MODE
606 return _mm512_mullo_epi64( (*a).value, (*b).value );
626#if BLAZE_AVX512DQ_MODE
628 return _mm512_mullo_epi64( (*a).value, (*b).value );
648#if BLAZE_AVX512DQ_MODE
650 return _mm512_mullo_epi64( (*a).value, (*b).value );
672#if BLAZE_AVX512DQ_MODE
674 return _mm512_mullo_epi64( (*a).value, (*b).value );
696#if BLAZE_AVX512DQ_MODE || BLAZE_MIC_MODE
698 return _mm512_mullo_epi64( (*a).value, (*b).value );
716template<
typename T >
718 operator*(
const SIMDci64<T>& a,
const SIMDci64<T>& b )
noexcept
719#if BLAZE_AVX512DQ_MODE
721 const __m512i a_ii = _mm512_shuffle_epi32( (*a).value, 0b11'10'11'10 );
722 const __m512i b_ri = _mm512_shuffle_epi32( (*b).value, 0b01'00'11'10 );
723 const __m512i a_rr = _mm512_shuffle_epi32( (*a).value, 0b01'00'01'00 );
725 const __m512i a_rr_b = _mm512_mullo_epi64( a_rr, (*b).value );
726 const __m512i a_ii_b_ri = _mm512_mullo_epi64( a_ii, b_ri );
727 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi64( a_ii_b_ri, 0b01010101,
728 _mm512_setzero_si512(), a_ii_b_ri );
729 return _mm512_add_epi64( a_rr_b, a_ii_b_ri_signed );
755 :
public SIMDf32< SIMDf32MultExpr<T1,T2> >
780#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
782 return _mm512_mul_ps(
a_.eval().value,
b_.eval().value );
786 return _mm256_mul_ps(
a_.eval().value,
b_.eval().value );
790 return _mm_mul_ps(
a_.eval().value,
b_.eval().value );
818 operator*(
const SIMDf32<T1>& a,
const SIMDf32<T2>& b )
noexcept
837#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
839 return _mm512_mul_ps( a.value, b.value );
843 return _mm256_mul_ps( a.value, b.value );
847 return _mm_mul_ps( a.value, b.value );
867#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
869 return _mm512_mul_ps( a.value, b.value );
873 return _mm256_mul_ps( a.value, b.value );
877 return _mm_mul_ps( a.value, b.value );
897#if BLAZE_AVX512F_MODE
899 const __m512 a_ii = _mm512_permute_ps( a.value, 0b11'11'01'01 );
900 const __m512 b_ri = _mm512_permute_ps( b.value, 0b10'11'00'01 );
901 const __m512 a_rr = _mm512_permute_ps( a.value, 0b10'10'00'00 );
902 return _mm512_fmaddsub_ps( a_rr, b.value, _mm512_mul_ps( a_ii, b_ri ) );
908 x = _mm256_shuffle_ps( a.value, a.value, 0xA0 );
909 z = _mm256_mul_ps( x, b.value );
910 x = _mm256_shuffle_ps( a.value, a.value, 0xF5 );
911 y = _mm256_shuffle_ps( b.value, b.value, 0xB1 );
912 y = _mm256_mul_ps( x, y );
913 return _mm256_addsub_ps( z, y );
919 x = _mm_shuffle_ps( a.value, a.value, 0xA0 );
920 z = _mm_mul_ps( x, b.value );
921 x = _mm_shuffle_ps( a.value, a.value, 0xF5 );
922 y = _mm_shuffle_ps( b.value, b.value, 0xB1 );
923 y = _mm_mul_ps( x, y );
924 return _mm_addsub_ps( z, y );
950 :
public SIMDf64< SIMDf64MultExpr<T1,T2> >
975#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
977 return _mm512_mul_pd(
a_.eval().value,
b_.eval().value );
981 return _mm256_mul_pd(
a_.eval().value,
b_.eval().value );
985 return _mm_mul_pd(
a_.eval().value,
b_.eval().value );
1010template<
typename T1
1013 operator*(
const SIMDf64<T1>& a,
const SIMDf64<T2>& b )
noexcept
1032#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
1034 return _mm512_mul_pd( a.value, b.value );
1038 return _mm256_mul_pd( a.value, b.value );
1040#elif BLAZE_SSE2_MODE
1042 return _mm_mul_pd( a.value, b.value );
1062#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
1064 return _mm512_mul_pd( a.value, b.value );
1068 return _mm256_mul_pd( a.value, b.value );
1070#elif BLAZE_SSE2_MODE
1072 return _mm_mul_pd( a.value, b.value );
1092#if BLAZE_AVX512F_MODE
1094 const __m512d a_ii = _mm512_permute_pd( a.value, 0b1'1'1'1'1'1'1'1 );
1095 const __m512d b_ri = _mm512_permute_pd( b.value, 0b0'1'0'1'0'1'0'1 );
1096 const __m512d a_rr = _mm512_permute_pd( a.value, 0 );
1097 return _mm512_fmaddsub_pd( a_rr, b.value, _mm512_mul_pd( a_ii, b_ri ) );
1103 x = _mm256_shuffle_pd( a.value, a.value, 0 );
1104 z = _mm256_mul_pd( x, b.value );
1105 x = _mm256_shuffle_pd( a.value, a.value, 15 );
1106 y = _mm256_shuffle_pd( b.value, b.value, 5 );
1107 y = _mm256_mul_pd( x, y );
1108 return _mm256_addsub_pd( z, y );
1110#elif BLAZE_SSE3_MODE
1114 x = _mm_shuffle_pd( a.value, a.value, 0 );
1115 z = _mm_mul_pd( x, b.value );
1116 x = _mm_shuffle_pd( a.value, a.value, 3 );
1117 y = _mm_shuffle_pd( b.value, b.value, 1 );
1118 y = _mm_mul_pd( x, y );
1119 return _mm_addsub_pd( z, y );
Header file for the basic SIMD types.
SIMD type for 64-bit double precision complex values.
SIMD type for 32-bit single precision complex values.
SIMD type for 16-bit signed integral complex values.
SIMD type for 32-bit signed integral complex values.
SIMD type for 64-bit signed integral complex values.
SIMD type for 16-bit unsigned integral complex values.
SIMD type for 32-bit unsigned integral complex values.
SIMD type for 64-bit unsigned integral complex values.
SIMD type for 64-bit double precision floating point data values.
SIMD type for 32-bit single precision floating point data values.
SIMD type for 16-bit signed integral data values.
SIMD type for 32-bit signed integral data values.
SIMD type for 64-bit integral data values.
SIMD type for 16-bit unsigned integral data values.
SIMD type for 32-bit unsigned integral data values.
SIMD type for 64-bit unsigned integral data values.
BLAZE_ALWAYS_INLINE const SIMDcdouble operator*(const SIMDcdouble &a, const SIMDcdouble &b) noexcept=delete
Multiplication of two vectors of double precision complex SIMD values.
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Expression object for 32-bit floating point multiplication operations.
Definition: Mult.h:756
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:798
SIMDf32< This > BaseType
Base type of this SIMDf32MultExpr instance.
Definition: Mult.h:759
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
BLAZE_ALWAYS_INLINE SIMDf32MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf32MultExpr class.
Definition: Mult.h:768
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:799
Expression object for 64-bit floating point multiplication operations.
Definition: Mult.h:951
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:993
SIMDf64< This > BaseType
Base type of this SIMDf64MultExpr instance.
Definition: Mult.h:954
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:994
BLAZE_ALWAYS_INLINE SIMDf64MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf64MultExpr class.
Definition: Mult.h:963
System settings for the inline keywords.
System settings for the SSE mode.