35 #ifndef _BLAZE_MATH_INTRINSICS_REDUCTION_H_
36 #define _BLAZE_MATH_INTRINSICS_REDUCTION_H_
65 const sse_int16_t b( _mm256_hadd_epi16( a.value, a.value ) );
66 const sse_int16_t c( _mm256_hadd_epi16( b.value, b.value ) );
67 const sse_int16_t d( _mm256_hadd_epi16( c.value, c.value ) );
68 const sse_int16_t e( _mm256_hadd_epi16( d.value, d.value ) );
70 #elif BLAZE_SSSE3_MODE
71 const sse_int16_t b( _mm_hadd_epi16( a.value, a.value ) );
72 const sse_int16_t c( _mm_hadd_epi16( b.value, b.value ) );
73 const sse_int16_t d( _mm_hadd_epi16( c.value, c.value ) );
76 return a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7];
94 return _mm512_reduce_add_epi32( a.value );
96 const sse_int32_t b( _mm256_hadd_epi32( a.value, a.value ) );
97 const sse_int32_t c( _mm256_hadd_epi32( b.value, b.value ) );
98 const sse_int32_t d( _mm256_hadd_epi32( c.value, c.value ) );
100 #elif BLAZE_SSSE3_MODE
101 const sse_int32_t b( _mm_hadd_epi32( a.value, a.value ) );
102 const sse_int32_t c( _mm_hadd_epi32( b.value, b.value ) );
104 #elif BLAZE_SSE2_MODE
105 return a[0] + a[1] + a[2] + a[3];
123 return _mm512_reduce_add_ps( a.value );
125 const sse_float_t b( _mm256_hadd_ps( a.value, a.value ) );
126 const sse_float_t c( _mm256_hadd_ps( b.value, b.value ) );
127 const sse_float_t d( _mm256_hadd_ps( c.value, c.value ) );
129 #elif BLAZE_SSE3_MODE
130 const sse_float_t b( _mm_hadd_ps( a.value, a.value ) );
131 const sse_float_t c( _mm_hadd_ps( b.value, b.value ) );
134 return a[0] + a[1] + a[2] + a[3];
152 return _mm512_reduce_add_pd( a.value );
154 const sse_double_t b( _mm256_hadd_pd( a.value, a.value ) );
155 const sse_double_t c( _mm256_hadd_pd( b.value, b.value ) );
157 #elif BLAZE_SSE3_MODE
158 const sse_double_t b( _mm_hadd_pd( a.value, a.value ) );
160 #elif BLAZE_SSE2_MODE
179 return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
181 return complex<float>( a[0] + a[1] + a[2] + a[3] );
183 return complex<float>( a[0] + a[1] );
201 return complex<double>( a[0] + a[1] + a[2] + a[3] );
203 return complex<double>( a[0] + a[1] );
204 #elif BLAZE_SSE2_MODE
Intrinsic type for 32-bit single precision complex values.
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
16-bit signed integer type of the Blaze library.
Intrinsic type for 16-bit integral data values.
Intrinsic type for 32-bit single precision floating point data values.
Intrinsic type for 64-bit double precision complex values.
Intrinsic type for 64-bit double precision floating point data values.
Header file for the basic intrinsic types.
System settings for the SSE mode.
Intrinsic type for 32-bit integral data values.
32-bit signed integer type of the Blaze library.