35 #ifndef _BLAZE_MATH_SIMD_SUM_H_ 36 #define _BLAZE_MATH_SIMD_SUM_H_ 64 template<
typename T >
67 #if BLAZE_AVX512BW_MODE 68 return (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
69 (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] +
70 (~a)[16] + (~a)[17] + (~a)[18] + (~a)[19] + (~a)[20] + (~a)[21] + (~a)[22] + (~a)[23] +
71 (~a)[24] + (~a)[25] + (~a)[26] + (~a)[27] + (~a)[28] + (~a)[29] + (~a)[30] + (~a)[31] +
72 (~a)[32] + (~a)[33] + (~a)[34] + (~a)[35] + (~a)[36] + (~a)[37] + (~a)[38] + (~a)[39] +
73 (~a)[40] + (~a)[41] + (~a)[42] + (~a)[43] + (~a)[44] + (~a)[45] + (~a)[46] + (~a)[47] +
74 (~a)[48] + (~a)[49] + (~a)[50] + (~a)[51] + (~a)[52] + (~a)[53] + (~a)[54] + (~a)[55] +
75 (~a)[56] + (~a)[57] + (~a)[58] + (~a)[59] + (~a)[60] + (~a)[61] + (~a)[62] + (~a)[63];
77 return (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
78 (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] +
79 (~a)[16] + (~a)[17] + (~a)[18] + (~a)[19] + (~a)[20] + (~a)[21] + (~a)[22] + (~a)[23] +
80 (~a)[24] + (~a)[25] + (~a)[26] + (~a)[27] + (~a)[28] + (~a)[29] + (~a)[30] + (~a)[31];
82 return (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
83 (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15];
98 template<
typename T >
101 #if BLAZE_AVX512BW_MODE 102 return complex<int8_t>( (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
103 (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] +
104 (~a)[16] + (~a)[17] + (~a)[18] + (~a)[19] + (~a)[20] + (~a)[21] + (~a)[22] + (~a)[23] +
105 (~a)[24] + (~a)[25] + (~a)[26] + (~a)[27] + (~a)[28] + (~a)[29] + (~a)[30] + (~a)[31] );
106 #elif BLAZE_AVX2_MODE 107 return complex<int8_t>( (~a)[0] + (~a)[1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
108 (~a)[8] + (~a)[9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] );
109 #elif BLAZE_SSE2_MODE 110 return complex<int8_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7] );
133 template<
typename T >
136 #if BLAZE_AVX512BW_MODE 137 const __m256i low ( _mm512_castsi512_si256( (~a).value ) );
138 const __m256i high( _mm512_extracti64x4_epi64( (~a).value, 1 ) );
139 const __m256i b ( _mm256_hadd_epi16( low, high ) );
140 const __m256i c ( _mm256_hadd_epi16( b, b ) );
141 const __m256i d ( _mm256_hadd_epi16( c, c ) );
142 const __m256i e ( _mm256_hadd_epi16( d, d ) );
143 const __m128i f ( _mm_add_epi16( _mm256_extracti128_si256( e, 1 )
144 , _mm256_castsi256_si128( e ) ) );
145 return _mm_extract_epi16( f, 0 );
146 #elif BLAZE_AVX2_MODE 147 const __m256i b( _mm256_hadd_epi16( (~a).value, (~a).value ) );
148 const __m256i c( _mm256_hadd_epi16( b, b ) );
149 const __m256i d( _mm256_hadd_epi16( c, c ) );
150 const __m128i e( _mm_add_epi16( _mm256_extracti128_si256( d, 1 )
151 , _mm256_castsi256_si128( d ) ) );
152 return _mm_extract_epi16( e, 0 );
153 #elif BLAZE_SSSE3_MODE 154 const __m128i b( _mm_hadd_epi16( (~a).value, (~a).value ) );
155 const __m128i c( _mm_hadd_epi16( b, b ) );
156 const __m128i d( _mm_hadd_epi16( c, c ) );
157 return _mm_extract_epi16( d, 0 );
158 #elif BLAZE_SSE2_MODE 159 return (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7];
174 template<
typename T >
177 #if BLAZE_AVX512BW_MODE 178 return complex<int16_t>( (~a)[0] + (~a)[1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
179 (~a)[8] + (~a)[9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] );
180 #elif BLAZE_AVX2_MODE 181 return complex<int16_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7] );
182 #elif BLAZE_SSE2_MODE 183 return complex<int16_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] );
206 template<
typename T >
209 #if BLAZE_AVX512F_MODE 210 const __m256i low ( _mm512_castsi512_si256( (~a).value ) );
211 const __m256i high( _mm512_extracti64x4_epi64( (~a).value, 1 ) );
212 const __m256i b ( _mm256_hadd_epi32( low, high ) );
213 const __m256i c ( _mm256_hadd_epi32( b, b ) );
214 const __m256i d ( _mm256_hadd_epi32( c, c ) );
215 const __m128i e ( _mm_add_epi32( _mm256_extracti128_si256( d, 1 )
216 , _mm256_castsi256_si128( d ) ) );
217 return _mm_extract_epi32( e, 0 );
219 return _mm512_reduce_add_epi32( (~a).value );
220 #elif BLAZE_AVX2_MODE 221 const __m256i b( _mm256_hadd_epi32( (~a).value, (~a).value ) );
222 const __m256i c( _mm256_hadd_epi32( b, b ) );
223 const __m128i d( _mm_add_epi32( _mm256_extracti128_si256( c, 1 )
224 , _mm256_castsi256_si128( c ) ) );
225 return _mm_extract_epi32( d, 0 );
226 #elif BLAZE_SSSE3_MODE 227 const __m128i b( _mm_hadd_epi32( (~a).value, (~a).value ) );
228 return _mm_cvtsi128_si32( _mm_hadd_epi32( b, b ) );
229 #elif BLAZE_SSE2_MODE 230 const __m128i b( _mm_add_epi32( (~a).value, _mm_shuffle_epi32( (~a).value, 0x4E ) ) );
231 return _mm_cvtsi128_si32( _mm_add_epi32( b, _mm_shuffle_epi32( b, 0xB1 ) ) );
246 template<
typename T >
249 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 250 return complex<int32_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7] );
251 #elif BLAZE_AVX2_MODE 252 return complex<int32_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] );
253 #elif BLAZE_SSE2_MODE 254 return complex<int32_t>( (~a)[0] + (~a)[1] );
277 template<
typename T >
280 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 281 return (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7];
282 #elif BLAZE_AVX2_MODE 283 return (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3];
284 #elif BLAZE_SSE2_MODE 285 return (~a)[0] + (~a)[1];
300 template<
typename T >
303 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 304 return complex<int64_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] );
305 #elif BLAZE_AVX2_MODE 306 return complex<int64_t>( (~a)[0] + (~a)[1] );
307 #elif BLAZE_SSE2_MODE 333 #if BLAZE_AVX512F_MODE 334 __m512 b( _mm512_shuffle_f32x4( a.value, a.value, 0b11
'10'11
'10 ) ); 335 const __m512 c( _mm512_add_ps( b, a.value ) ); 336 const __m512 d( _mm512_shuffle_f32x4( c, c, 0b01'01
'01'01 ) );
337 const __m512 e( _mm512_add_ps( d, c ) );
338 const __m512 f( _mm512_castsi512_ps( _mm512_shuffle_epi32( _mm512_castps_si512( e ), _MM_PERM_BADC ) ) );
339 const __m512 g( _mm512_add_ps( e, f ) );
340 const __m512 h( _mm512_castsi512_ps( _mm512_shuffle_epi32( _mm512_castps_si512( g ), _MM_PERM_CDAB ) ) );
341 b = _mm512_add_ps( g, h );
342 return _mm_cvtss_f32( _mm512_castps512_ps128( b ) );
344 return _mm512_reduce_add_ps( a.value );
346 const __m256 b( _mm256_hadd_ps( a.value, a.value ) );
347 const __m256 c( _mm256_hadd_ps( b, b ) );
348 const __m128 d( _mm_add_ps( _mm256_extractf128_ps( c, 1 ), _mm256_castps256_ps128( c ) ) );
349 return _mm_cvtss_f32( d );
350 #elif BLAZE_SSE3_MODE 351 const __m128 b( _mm_hadd_ps( a.value, a.value ) );
352 const __m128 c( _mm_hadd_ps( b, b ) );
353 return _mm_cvtss_f32( c );
355 const __m128 b( _mm_add_ps( a.value, _mm_movehl_ps( a.value, a.value ) ) );
356 return _mm_cvtss_f32( _mm_add_ss( b, _mm_shuffle_ps( b, b, 1 ) ) );
373 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 374 return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
376 return complex<float>( a[0] + a[1] + a[2] + a[3] );
378 return complex<float>( a[0] + a[1] );
403 #if BLAZE_AVX512F_MODE 404 __m512d b( _mm512_shuffle_f64x2( a.value, a.value, 0b11
'10'11
'10 ) ); 405 const __m512d c( _mm512_add_pd( a.value, b ) ); 406 const __m512d d( _mm512_permutex_pd( c, 0b01'00
'11'10 ) );
407 const __m512d e( _mm512_add_pd( c , d ) );
408 const __m512d f( _mm512_permutex_pd( e, 0b10
'11'00
'01 ) ); 409 b = _mm512_add_pd( e, f ); 410 return _mm_cvtsd_f64( _mm512_castpd512_pd128( b ) ); 412 return _mm512_reduce_add_pd( a.value ); 414 const __m256d b( _mm256_hadd_pd( a.value, a.value ) ); 415 const __m128d c( _mm_add_pd( _mm256_extractf128_pd( b, 1 ), _mm256_castpd256_pd128( b ) ) ); 416 return _mm_cvtsd_f64( c ); 417 #elif BLAZE_SSE3_MODE 418 return _mm_cvtsd_f64( _mm_hadd_pd( a.value, a.value ) ); 419 #elif BLAZE_SSE2_MODE 420 return _mm_cvtsd_f64( _mm_add_sd( a.value, _mm_unpackhi_pd( a.value, a.value ) ) ); 425 //************************************************************************************************* 428 //************************************************************************************************* 435 BLAZE_ALWAYS_INLINE const complex<double> sum( const SIMDcdouble& a ) noexcept 437 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 438 return complex<double>( a[0] + a[1] + a[2] + a[3] ); 440 return complex<double>( a[0] + a[1] ); 441 #elif BLAZE_SSE2_MODE 447 //************************************************************************************************* Header file for auxiliary alias declarations.
typename T::ValueType ValueType_t
Alias declaration for nested ValueType type definitions.The ValueType_t alias declaration provides a ...
Definition: Aliases.h:490
SIMD type for 64-bit double precision floating point data values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
decltype(auto) sum(const DenseMatrix< MT, SO > &dm)
Reduces the given dense matrix by means of addition.
Definition: DMatReduceExpr.h:2147
Header file for the basic SIMD types.
SIMD type for 32-bit single precision complex values.
SIMD type for 32-bit single precision floating point data values.
System settings for the SSE mode.
System settings for the inline keywords.