35 #ifndef _BLAZE_MATH_INTRINSICS_REDUCTION_H_
36 #define _BLAZE_MATH_INTRINSICS_REDUCTION_H_
66 const sse_int16_t b( _mm256_hadd_epi16( a.value, a.value ) );
67 const sse_int16_t c( _mm256_hadd_epi16( b.value, b.value ) );
68 const sse_int16_t d( _mm256_hadd_epi16( c.value, c.value ) );
69 const __m128i e = _mm_add_epi16( _mm256_extracti128_si256( d.value, 1 )
70 , _mm256_castsi256_si128( d.value ) );
71 return _mm_extract_epi16( e, 0 );
72 #elif BLAZE_SSSE3_MODE
73 const sse_int16_t b( _mm_hadd_epi16( a.value, a.value ) );
74 const sse_int16_t c( _mm_hadd_epi16( b.value, b.value ) );
75 const sse_int16_t d( _mm_hadd_epi16( c.value, c.value ) );
78 return a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7];
96 return _mm512_reduce_add_epi32( a.value );
98 const sse_int32_t b( _mm256_hadd_epi32( a.value, a.value ) );
99 const sse_int32_t c( _mm256_hadd_epi32( b.value, b.value ) );
100 const __m128i d = _mm_add_epi32( _mm256_extracti128_si256( c.value, 1 )
101 , _mm256_castsi256_si128( c.value ) );
102 return _mm_extract_epi32( d, 0 );
103 #elif BLAZE_SSSE3_MODE
104 const sse_int32_t b( _mm_hadd_epi32( a.value, a.value ) );
105 const sse_int32_t c( _mm_hadd_epi32( b.value, b.value ) );
107 #elif BLAZE_SSE2_MODE
108 return a[0] + a[1] + a[2] + a[3];
126 return _mm512_reduce_add_epi64( a.value );
127 #elif BLAZE_AVX2_MODE
128 return a[0] + a[1] + a[2] + a[3];
129 #elif BLAZE_SSE2_MODE
148 return _mm512_reduce_add_ps( a.value );
150 const sse_float_t b( _mm256_hadd_ps( a.value, a.value ) );
151 const sse_float_t c( _mm256_hadd_ps( b.value, b.value ) );
152 const __m128 d = _mm_add_ps( _mm256_extractf128_ps( c.value, 1 )
153 , _mm256_castps256_ps128( c.value ) );
154 return *
reinterpret_cast<const float*
>( &d );
155 #elif BLAZE_SSE3_MODE
156 const sse_float_t b( _mm_hadd_ps( a.value, a.value ) );
157 const sse_float_t c( _mm_hadd_ps( b.value, b.value ) );
160 return a[0] + a[1] + a[2] + a[3];
178 return _mm512_reduce_add_pd( a.value );
180 const sse_double_t b( _mm256_hadd_pd( a.value, a.value ) );
181 const __m128d c = _mm_add_pd( _mm256_extractf128_pd( b.value, 1 )
182 , _mm256_castpd256_pd128( b.value ) );
183 return *
reinterpret_cast<const double*
>( &c );
184 #elif BLAZE_SSE3_MODE
185 const sse_double_t b( _mm_hadd_pd( a.value, a.value ) );
187 #elif BLAZE_SSE2_MODE
206 return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
208 return complex<float>( a[0] + a[1] + a[2] + a[3] );
210 return complex<float>( a[0] + a[1] );
228 return complex<double>( a[0] + a[1] + a[2] + a[3] );
230 return complex<double>( a[0] + a[1] );
231 #elif BLAZE_SSE2_MODE
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Intrinsic type for 32-bit single precision complex values.
16-bit signed integer type of the Blaze library.
Intrinsic type for 64-bit integral data values.
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Intrinsic type for 16-bit integral data values.
Intrinsic type for 32-bit single precision floating point data values.
Intrinsic type for 64-bit double precision complex values.
Intrinsic type for 64-bit double precision floating point data values.
Header file for the basic intrinsic types.
System settings for the SSE mode.
64-bit signed integer type of the Blaze library.
Intrinsic type for 32-bit integral data values.
System settings for the inline keywords.
32-bit signed integer type of the Blaze library.