35 #ifndef _BLAZE_MATH_INTRINSICS_REDUCTION_H_
36 #define _BLAZE_MATH_INTRINSICS_REDUCTION_H_
66 const simd_int16_t b( _mm256_hadd_epi16( a.value, a.value ) );
67 const simd_int16_t c( _mm256_hadd_epi16( b.value, b.value ) );
68 const simd_int16_t d( _mm256_hadd_epi16( c.value, c.value ) );
69 const __m128i e = _mm_add_epi16( _mm256_extracti128_si256( d.value, 1 )
70 , _mm256_castsi256_si128( d.value ) );
71 return _mm_extract_epi16( e, 0 );
72 #elif BLAZE_SSSE3_MODE
73 const simd_int16_t b( _mm_hadd_epi16( a.value, a.value ) );
74 const simd_int16_t c( _mm_hadd_epi16( b.value, b.value ) );
75 const simd_int16_t d( _mm_hadd_epi16( c.value, c.value ) );
78 return a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7];
96 return _mm512_reduce_add_epi32( a.value );
98 const simd_int32_t b( _mm256_hadd_epi32( a.value, a.value ) );
99 const simd_int32_t c( _mm256_hadd_epi32( b.value, b.value ) );
100 const __m128i d = _mm_add_epi32( _mm256_extracti128_si256( c.value, 1 )
101 , _mm256_castsi256_si128( c.value ) );
102 return _mm_extract_epi32( d, 0 );
103 #elif BLAZE_SSSE3_MODE
104 const simd_int32_t b( _mm_hadd_epi32( a.value, a.value ) );
105 const simd_int32_t c( _mm_hadd_epi32( b.value, b.value ) );
107 #elif BLAZE_SSE2_MODE
108 return a[0] + a[1] + a[2] + a[3];
126 return _mm512_reduce_add_epi64( a.value );
127 #elif BLAZE_AVX2_MODE
128 return a[0] + a[1] + a[2] + a[3];
129 #elif BLAZE_SSE2_MODE
148 return _mm512_reduce_add_ps( a.value );
150 const simd_float_t b( _mm256_hadd_ps( a.value, a.value ) );
151 const simd_float_t c( _mm256_hadd_ps( b.value, b.value ) );
152 const __m128 d = _mm_add_ps( _mm256_extractf128_ps( c.value, 1 )
153 , _mm256_castps256_ps128( c.value ) );
154 return *
reinterpret_cast<const float*
>( &d );
155 #elif BLAZE_SSE3_MODE
156 const simd_float_t b( _mm_hadd_ps( a.value, a.value ) );
157 const simd_float_t c( _mm_hadd_ps( b.value, b.value ) );
160 return a[0] + a[1] + a[2] + a[3];
178 return _mm512_reduce_add_pd( a.value );
181 const __m128d c = _mm_add_pd( _mm256_extractf128_pd( b.value, 1 )
182 , _mm256_castpd256_pd128( b.value ) );
183 return *
reinterpret_cast<const double*
>( &c );
184 #elif BLAZE_SSE3_MODE
187 #elif BLAZE_SSE2_MODE
206 return complex<int8_t>( a[0] + a[1] + a[ 2] + a[ 3] + a[ 4] + a[ 5] + a[ 6] + a[ 7] +
207 a[8] + a[9] + a[10] + a[11] + a[12] + a[13] + a[14] + a[15] );
208 #elif BLAZE_SSE2_MODE
209 return complex<int8_t>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
227 return complex<int16_t>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
228 #elif BLAZE_SSE2_MODE
229 return complex<int16_t>( a[0] + a[1] + a[2] + a[3] );
247 return complex<int32_t>( a[0] + a[1] + a[2] + a[3] );
248 #elif BLAZE_SSE2_MODE
249 return complex<int32_t>( a[0] + a[1] );
267 return complex<int64_t>( a[0] + a[1] );
268 #elif BLAZE_SSE2_MODE
287 return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
289 return complex<float>( a[0] + a[1] + a[2] + a[3] );
291 return complex<float>( a[0] + a[1] );
309 return complex<double>( a[0] + a[1] + a[2] + a[3] );
311 return complex<double>( a[0] + a[1] );
312 #elif BLAZE_SSE2_MODE
Intrinsic type for 16-bit integral data values.
16-bit signed integer type of the Blaze library.
Intrinsic type for 64-bit double precision floating point data values.
Intrinsic type for 8-bit integral complex values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Header file for the basic intrinsic types.
Intrinsic type for 64-bit integral complex values.
Intrinsic type for 32-bit single precision floating point data values.
Intrinsic type for 64-bit integral data values.
Intrinsic type for 32-bit integral complex values.
Intrinsic type for 64-bit double precision complex values.
BLAZE_ALWAYS_INLINE int16_t sum(const simd_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Intrinsic type for 32-bit single precision complex values.
Intrinsic type for 16-bit integral complex values.
System settings for the SSE mode.
64-bit signed integer type of the Blaze library.
System settings for the inline keywords.
32-bit signed integer type of the Blaze library.
Intrinsic type for 32-bit integral data values.