22 #ifndef _BLAZE_MATH_INTRINSICS_MULTIPLICATION_H_
23 #define _BLAZE_MATH_INTRINSICS_MULTIPLICATION_H_
52 inline sse_int16_t
operator*(
const sse_int16_t& a,
const sse_int16_t& b )
54 return _mm256_mullo_epi16( a.value, b.value );
57 inline sse_int16_t
operator*(
const sse_int16_t& a,
const sse_int16_t& b )
59 return _mm_mullo_epi16( a.value, b.value );
75 inline sse_int32_t
operator*(
const sse_int32_t& a,
const sse_int32_t& b )
77 return _mm512_mullo_epi32( a.value, b.value );
80 inline sse_int32_t
operator*(
const sse_int32_t& a,
const sse_int32_t& b )
82 return _mm256_mullo_epi32( a.value, b.value );
85 inline sse_int32_t
operator*(
const sse_int32_t& a,
const sse_int32_t& b )
87 return _mm_mullo_epi32( a.value, b.value );
103 inline sse_float_t
operator*(
const sse_float_t& a,
const sse_float_t& b )
105 return _mm512_mul_ps( a.value, b.value );
108 inline sse_float_t
operator*(
const sse_float_t& a,
const sse_float_t& b )
110 return _mm256_mul_ps( a.value, b.value );
113 inline sse_float_t
operator*(
const sse_float_t& a,
const sse_float_t& b )
115 return _mm_mul_ps( a.value, b.value );
131 inline sse_double_t
operator*(
const sse_double_t& a,
const sse_double_t& b )
133 return _mm512_mul_pd( a.value, b.value );
136 inline sse_double_t
operator*(
const sse_double_t& a,
const sse_double_t& b )
138 return _mm256_mul_pd( a.value, b.value );
140 #elif BLAZE_SSE2_MODE
141 inline sse_double_t
operator*(
const sse_double_t& a,
const sse_double_t& b )
143 return _mm_mul_pd( a.value, b.value );
159 inline sse_cfloat_t
operator*(
const sse_cfloat_t& a,
const sse_cfloat_t& b )
163 x = _mm256_shuffle_ps( a.value, a.value, 0xA0A0 );
164 z = _mm256_mul_ps( x, b.value );
165 x = _mm256_shuffle_ps( a.value, a.value, 0xF5F5 );
166 y = _mm256_shuffle_ps( b.value, b.value, 0xB1B1 );
167 y = _mm256_mul_ps( x, y );
168 return _mm256_addsub_ps( z, y );
170 #elif BLAZE_SSE3_MODE
171 inline sse_cfloat_t
operator*(
const sse_cfloat_t& a,
const sse_cfloat_t& b )
175 x = _mm_shuffle_ps( a.value, a.value, 0xA0 );
176 z = _mm_mul_ps( x, b.value );
177 x = _mm_shuffle_ps( a.value, a.value, 0xF5 );
178 y = _mm_shuffle_ps( b.value, b.value, 0xB1 );
179 y = _mm_mul_ps( x, y );
180 return _mm_addsub_ps( z, y );
196 inline sse_cdouble_t
operator*(
const sse_cdouble_t& a,
const sse_cdouble_t& b )
200 x = _mm256_shuffle_pd( a.value, a.value, 0 );
201 z = _mm256_mul_pd( x, b.value );
202 x = _mm256_shuffle_pd( a.value, a.value, 15 );
203 y = _mm256_shuffle_pd( b.value, b.value, 5 );
204 y = _mm256_mul_pd( x, y );
205 return _mm256_addsub_pd( z, y );
207 #elif BLAZE_SSE3_MODE
208 inline sse_cdouble_t
operator*(
const sse_cdouble_t& a,
const sse_cdouble_t& b )
212 x = _mm_shuffle_pd( a.value, a.value, 0 );
213 z = _mm_mul_pd( x, b.value );
214 x = _mm_shuffle_pd( a.value, a.value, 3 );
215 y = _mm_shuffle_pd( b.value, b.value, 1 );
216 y = _mm_mul_pd( x, y );
217 return _mm_addsub_pd( z, y );