35 #ifndef _BLAZE_MATH_INTRINSICS_REDUCTION_H_
36 #define _BLAZE_MATH_INTRINSICS_REDUCTION_H_
65 const sse_int16_t b( _mm256_hadd_epi16( a.value, a.value ) );
66 const sse_int16_t c( _mm256_hadd_epi16( b.value, b.value ) );
67 const sse_int16_t d( _mm256_hadd_epi16( c.value, c.value ) );
68 const sse_int16_t e( _mm256_hadd_epi16( d.value, d.value ) );
70 #elif BLAZE_SSSE3_MODE
71 const sse_int16_t b( _mm_hadd_epi16( a.value, a.value ) );
72 const sse_int16_t c( _mm_hadd_epi16( b.value, b.value ) );
73 const sse_int16_t d( _mm_hadd_epi16( c.value, c.value ) );
76 return a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7];
94 return _mm512_reduce_add_epi32( a.value );
96 const sse_int32_t b( _mm256_hadd_epi32( a.value, a.value ) );
97 const sse_int32_t c( _mm256_hadd_epi32( b.value, b.value ) );
98 const sse_int32_t d( _mm256_hadd_epi32( c.value, c.value ) );
100 #elif BLAZE_SSSE3_MODE
101 const sse_int32_t b( _mm_hadd_epi32( a.value, a.value ) );
102 const sse_int32_t c( _mm_hadd_epi32( b.value, b.value ) );
104 #elif BLAZE_SSE2_MODE
105 return a[0] + a[1] + a[2] + a[3];
123 return _mm512_reduce_add_epi64( a.value );
124 #elif BLAZE_AVX2_MODE
125 return a[0] + a[1] + a[2] + a[3];
126 #elif BLAZE_SSE2_MODE
145 return _mm512_reduce_add_ps( a.value );
147 const sse_float_t b( _mm256_hadd_ps( a.value, a.value ) );
148 const sse_float_t c( _mm256_hadd_ps( b.value, b.value ) );
149 const __m128 d = _mm_add_ps( _mm256_extractf128_ps( c.value, 1 )
150 , _mm256_castps256_ps128( c.value ) );
151 return *
reinterpret_cast<const float*
>( &d );
152 #elif BLAZE_SSE3_MODE
153 const sse_float_t b( _mm_hadd_ps( a.value, a.value ) );
154 const sse_float_t c( _mm_hadd_ps( b.value, b.value ) );
157 return a[0] + a[1] + a[2] + a[3];
175 return _mm512_reduce_add_pd( a.value );
177 const sse_double_t b( _mm256_hadd_pd( a.value, a.value ) );
178 const __m128d c = _mm_add_pd( _mm256_extractf128_pd( b.value, 1 )
179 , _mm256_castpd256_pd128( b.value ) );
180 return *
reinterpret_cast<const double*
>( &c );
181 #elif BLAZE_SSE3_MODE
182 const sse_double_t b( _mm_hadd_pd( a.value, a.value ) );
184 #elif BLAZE_SSE2_MODE
203 return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
205 return complex<float>( a[0] + a[1] + a[2] + a[3] );
207 return complex<float>( a[0] + a[1] );
225 return complex<double>( a[0] + a[1] + a[2] + a[3] );
227 return complex<double>( a[0] + a[1] );
228 #elif BLAZE_SSE2_MODE
Intrinsic type for 32-bit single precision complex values.
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
16-bit signed integer type of the Blaze library.
Intrinsic type for 64-bit integral data values.
Intrinsic type for 16-bit integral data values.
Intrinsic type for 32-bit single precision floating point data values.
Intrinsic type for 64-bit double precision complex values.
Intrinsic type for 64-bit double precision floating point data values.
Header file for the basic intrinsic types.
System settings for the SSE mode.
64-bit signed integer type of the Blaze library.
Intrinsic type for 32-bit integral data values.
32-bit signed integer type of the Blaze library.