Reduction.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_REDUCTION_H_
36 #define _BLAZE_MATH_SIMD_REDUCTION_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
45 #include <blaze/system/Inline.h>
47 
48 
49 namespace blaze {
50 
51 //=================================================================================================
52 //
53 // 8-BIT INTEGRAL SIMD TYPES
54 //
55 //=================================================================================================
56 
57 //*************************************************************************************************
64 template< typename T > // Type of the SIMD element
65 BLAZE_ALWAYS_INLINE ValueType_<T> sum( const SIMDi8<T>& a ) noexcept
66 {
67 #if BLAZE_AVX512BW_MODE
68  return (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
69  (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] +
70  (~a)[16] + (~a)[17] + (~a)[18] + (~a)[19] + (~a)[20] + (~a)[21] + (~a)[22] + (~a)[23] +
71  (~a)[24] + (~a)[25] + (~a)[26] + (~a)[27] + (~a)[28] + (~a)[29] + (~a)[30] + (~a)[31] +
72  (~a)[32] + (~a)[33] + (~a)[34] + (~a)[35] + (~a)[36] + (~a)[37] + (~a)[38] + (~a)[39] +
73  (~a)[40] + (~a)[41] + (~a)[42] + (~a)[43] + (~a)[44] + (~a)[45] + (~a)[46] + (~a)[47] +
74  (~a)[48] + (~a)[49] + (~a)[50] + (~a)[51] + (~a)[52] + (~a)[53] + (~a)[54] + (~a)[55] +
75  (~a)[56] + (~a)[57] + (~a)[58] + (~a)[59] + (~a)[60] + (~a)[61] + (~a)[62] + (~a)[63];
76 #elif BLAZE_AVX2_MODE
77  return (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
78  (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] +
79  (~a)[16] + (~a)[17] + (~a)[18] + (~a)[19] + (~a)[20] + (~a)[21] + (~a)[22] + (~a)[23] +
80  (~a)[24] + (~a)[25] + (~a)[26] + (~a)[27] + (~a)[28] + (~a)[29] + (~a)[30] + (~a)[31];
81 #elif BLAZE_SSE2_MODE
82  return (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
83  (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15];
84 #else
85  return (~a).value;
86 #endif
87 }
88 //*************************************************************************************************
89 
90 
91 //*************************************************************************************************
98 template< typename T > // Type of the SIMD element
99 BLAZE_ALWAYS_INLINE const ValueType_<T> sum( const SIMDci8<T>& a ) noexcept
100 {
101 #if BLAZE_AVX512BW_MODE
102  return complex<int8_t>( (~a)[ 0] + (~a)[ 1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
103  (~a)[ 8] + (~a)[ 9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] +
104  (~a)[16] + (~a)[17] + (~a)[18] + (~a)[19] + (~a)[20] + (~a)[21] + (~a)[22] + (~a)[23] +
105  (~a)[24] + (~a)[25] + (~a)[26] + (~a)[27] + (~a)[28] + (~a)[29] + (~a)[30] + (~a)[31] );
106 #elif BLAZE_AVX2_MODE
107  return complex<int8_t>( (~a)[0] + (~a)[1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
108  (~a)[8] + (~a)[9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] );
109 #elif BLAZE_SSE2_MODE
110  return complex<int8_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7] );
111 #else
112  return (~a).value;
113 #endif
114 }
115 //*************************************************************************************************
116 
117 
118 
119 
120 //=================================================================================================
121 //
122 // 16-BIT INTEGRAL SIMD TYPES
123 //
124 //=================================================================================================
125 
126 //*************************************************************************************************
133 template< typename T > // Type of the SIMD element
134 BLAZE_ALWAYS_INLINE ValueType_<T> sum( const SIMDi16<T>& a ) noexcept
135 {
136 #if BLAZE_AVX512BW_MODE
137  const __m256i low ( _mm512_castsi512_si256( (~a).value ) );
138  const __m256i high( _mm512_extracti64x4_epi64( (~a).value, 1 ) );
139  const __m256i b ( _mm256_hadd_epi16( low, high ) );
140  const __m256i c ( _mm256_hadd_epi16( b, b ) );
141  const __m256i d ( _mm256_hadd_epi16( c, c ) );
142  const __m256i e ( _mm256_hadd_epi16( d, d ) );
143  const __m128i f ( _mm_add_epi16( _mm256_extracti128_si256( e, 1 )
144  , _mm256_castsi256_si128( e ) ) );
145  return _mm_extract_epi16( f, 0 );
146 #elif BLAZE_AVX2_MODE
147  const __m256i b( _mm256_hadd_epi16( (~a).value, (~a).value ) );
148  const __m256i c( _mm256_hadd_epi16( b, b ) );
149  const __m256i d( _mm256_hadd_epi16( c, c ) );
150  const __m128i e( _mm_add_epi16( _mm256_extracti128_si256( d, 1 )
151  , _mm256_castsi256_si128( d ) ) );
152  return _mm_extract_epi16( e, 0 );
153 #elif BLAZE_SSSE3_MODE
154  const __m128i b( _mm_hadd_epi16( (~a).value, (~a).value ) );
155  const __m128i c( _mm_hadd_epi16( b, b ) );
156  const __m128i d( _mm_hadd_epi16( c, c ) );
157  return _mm_extract_epi16( d, 0 );
158 #elif BLAZE_SSE2_MODE
159  return (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7];
160 #else
161  return (~a).value;
162 #endif
163 }
164 //*************************************************************************************************
165 
166 
167 //*************************************************************************************************
174 template< typename T > // Type of the SIMD element
175 BLAZE_ALWAYS_INLINE const ValueType_<T> sum( const SIMDci16<T>& a ) noexcept
176 {
177 #if BLAZE_AVX512BW_MODE
178  return complex<int16_t>( (~a)[0] + (~a)[1] + (~a)[ 2] + (~a)[ 3] + (~a)[ 4] + (~a)[ 5] + (~a)[ 6] + (~a)[ 7] +
179  (~a)[8] + (~a)[9] + (~a)[10] + (~a)[11] + (~a)[12] + (~a)[13] + (~a)[14] + (~a)[15] );
180 #elif BLAZE_AVX2_MODE
181  return complex<int16_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7] );
182 #elif BLAZE_SSE2_MODE
183  return complex<int16_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] );
184 #else
185  return (~a).value;
186 #endif
187 }
188 //*************************************************************************************************
189 
190 
191 
192 
193 //=================================================================================================
194 //
195 // 32-BIT INTEGRAL SIMD TYPES
196 //
197 //=================================================================================================
198 
199 //*************************************************************************************************
206 template< typename T > // Type of the SIMD element
207 BLAZE_ALWAYS_INLINE ValueType_<T> sum( const SIMDi32<T>& a ) noexcept
208 {
209 #if BLAZE_AVX512F_MODE
210  const __m256i low ( _mm512_castsi512_si256( (~a).value ) );
211  const __m256i high( _mm512_extracti64x4_epi64( (~a).value, 1 ) );
212  const __m256i b ( _mm256_hadd_epi32( low, high ) );
213  const __m256i c ( _mm256_hadd_epi32( b, b ) );
214  const __m256i d ( _mm256_hadd_epi32( c, c ) );
215  const __m128i e ( _mm_add_epi32( _mm256_extracti128_si256( d, 1 )
216  , _mm256_castsi256_si128( d ) ) );
217  return _mm_extract_epi32( e, 0 );
218 #elif BLAZE_MIC_MODE
219  return _mm512_reduce_add_epi32( (~a).value );
220 #elif BLAZE_AVX2_MODE
221  const __m256i b( _mm256_hadd_epi32( (~a).value, (~a).value ) );
222  const __m256i c( _mm256_hadd_epi32( b, b ) );
223  const __m128i d( _mm_add_epi32( _mm256_extracti128_si256( c, 1 )
224  , _mm256_castsi256_si128( c ) ) );
225  return _mm_extract_epi32( d, 0 );
226 #elif BLAZE_SSSE3_MODE
227  const __m128i b( _mm_hadd_epi32( (~a).value, (~a).value ) );
228  const __m128i c( _mm_hadd_epi32( b, b ) );
229  return _mm_cvtsi128_si32( c );
230 #elif BLAZE_SSE2_MODE
231  return (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3];
232 #else
233  return (~a).value;
234 #endif
235 }
236 //*************************************************************************************************
237 
238 
239 //*************************************************************************************************
246 template< typename T > // Type of the SIMD element
247 BLAZE_ALWAYS_INLINE const ValueType_<T> sum( const SIMDci32<T>& a ) noexcept
248 {
249 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
250  return complex<int32_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7] );
251 #elif BLAZE_AVX2_MODE
252  return complex<int32_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] );
253 #elif BLAZE_SSE2_MODE
254  return complex<int32_t>( (~a)[0] + (~a)[1] );
255 #else
256  return (~a).value;
257 #endif
258 }
259 //*************************************************************************************************
260 
261 
262 
263 
264 //=================================================================================================
265 //
266 // 64-BIT INTEGRAL SIMD TYPES
267 //
268 //=================================================================================================
269 
270 //*************************************************************************************************
277 template< typename T > // Type of the SIMD element
278 BLAZE_ALWAYS_INLINE ValueType_<T> sum( const SIMDi64<T>& a ) noexcept
279 {
280 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
281  return (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] + (~a)[4] + (~a)[5] + (~a)[6] + (~a)[7];
282 #elif BLAZE_AVX2_MODE
283  return (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3];
284 #elif BLAZE_SSE2_MODE
285  return (~a)[0] + (~a)[1];
286 #else
287  return (~a).value;
288 #endif
289 }
290 //*************************************************************************************************
291 
292 
293 //*************************************************************************************************
300 template< typename T > // Type of the SIMD element
301 BLAZE_ALWAYS_INLINE const ValueType_<T> sum( const SIMDci64<T>& a ) noexcept
302 {
303 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
304  return complex<int64_t>( (~a)[0] + (~a)[1] + (~a)[2] + (~a)[3] );
305 #elif BLAZE_AVX2_MODE
306  return complex<int64_t>( (~a)[0] + (~a)[1] );
307 #elif BLAZE_SSE2_MODE
308  return (~a)[0];
309 #else
310  return (~a).value;
311 #endif
312 }
313 //*************************************************************************************************
314 
315 
316 
317 
318 //=================================================================================================
319 //
320 // 32-BIT FLOATING POINT SIMD TYPES
321 //
322 //=================================================================================================
323 
324 //*************************************************************************************************
331 BLAZE_ALWAYS_INLINE float sum( const SIMDfloat& a ) noexcept
332 {
333 #if BLAZE_AVX512F_MODE
334  __m512 b( _mm512_shuffle_f32x4( a.value, a.value, 0b11'10'11'10 ) );
335  const __m512 c( _mm512_add_ps( b, a.value ) );
336  const __m512 d( _mm512_shuffle_f32x4( c, c, 0b01'01'01'01 ) );
337  const __m512 e( _mm512_add_ps( d, c ) );
338  const __m512 f( _mm512_castsi512_ps( _mm512_shuffle_epi32( _mm512_castps_si512( e ), _MM_PERM_BADC ) ) );
339  const __m512 g( _mm512_add_ps( e, f ) );
340  const __m512 h( _mm512_castsi512_ps( _mm512_shuffle_epi32( _mm512_castps_si512( g ), _MM_PERM_CDAB ) ) );
341  b = _mm512_add_ps( g, h );
342  return _mm_cvtss_f32( _mm512_castps512_ps128( b ) );
343 #elif BLAZE_MIC_MODE
344  return _mm512_reduce_add_ps( a.value );
345 #elif BLAZE_AVX_MODE
346  const __m256 b( _mm256_hadd_ps( a.value, a.value ) );
347  const __m256 c( _mm256_hadd_ps( b, b ) );
348  const __m128 d( _mm_add_ps( _mm256_extractf128_ps( c, 1 ), _mm256_castps256_ps128( c ) ) );
349  return _mm_cvtss_f32( d );
350 #elif BLAZE_SSE3_MODE
351  const __m128 b( _mm_hadd_ps( a.value, a.value ) );
352  const __m128 c( _mm_hadd_ps( b, b ) );
353  return _mm_cvtss_f32( c );
354 #elif BLAZE_SSE_MODE
355  return a[0] + a[1] + a[2] + a[3];
356 #else
357  return a.value;
358 #endif
359 }
360 //*************************************************************************************************
361 
362 
363 //*************************************************************************************************
370 BLAZE_ALWAYS_INLINE const complex<float> sum( const SIMDcfloat& a ) noexcept
371 {
372 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
373  return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
374 #elif BLAZE_AVX_MODE
375  return complex<float>( a[0] + a[1] + a[2] + a[3] );
376 #elif BLAZE_SSE_MODE
377  return complex<float>( a[0] + a[1] );
378 #else
379  return a.value;
380 #endif
381 }
382 //*************************************************************************************************
383 
384 
385 
386 
387 //=================================================================================================
388 //
389 // 64-BIT FLOATING POINT SIMD TYPES
390 //
391 //=================================================================================================
392 
393 //*************************************************************************************************
400 BLAZE_ALWAYS_INLINE double sum( const SIMDdouble& a ) noexcept
401 {
402 #if BLAZE_AVX512F_MODE
403  __m512d b( _mm512_shuffle_f64x2( a.value, a.value, 0b11'10'11'10 ) );
404  const __m512d c( _mm512_add_pd( a.value, b ) );
405  const __m512d d( _mm512_permutex_pd( c, 0b01'00'11'10 ) );
406  const __m512d e( _mm512_add_pd( c , d ) );
407  const __m512d f( _mm512_permutex_pd( e, 0b10'11'00'01 ) );
408  b = _mm512_add_pd( e, f );
409  return _mm_cvtsd_f64( _mm512_castpd512_pd128( b ) );
410 #elif BLAZE_MIC_MODE
411  return _mm512_reduce_add_pd( a.value );
412 #elif BLAZE_AVX_MODE
413  const __m256d b( _mm256_hadd_pd( a.value, a.value ) );
414  const __m128d c( _mm_add_pd( _mm256_extractf128_pd( b, 1 ), _mm256_castpd256_pd128( b ) ) );
415  return _mm_cvtsd_f64( c );
416 #elif BLAZE_SSE3_MODE
417  const __m128d b( _mm_hadd_pd( a.value, a.value ) );
418  return _mm_cvtsd_f64( b );
419 #elif BLAZE_SSE2_MODE
420  return a[0] + a[1];
421 #else
422  return a.value;
423 #endif
424 }
425 //*************************************************************************************************
426 
427 
428 //*************************************************************************************************
435 BLAZE_ALWAYS_INLINE const complex<double> sum( const SIMDcdouble& a ) noexcept
436 {
437 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
438  return complex<double>( a[0] + a[1] + a[2] + a[3] );
439 #elif BLAZE_AVX_MODE
440  return complex<double>( a[0] + a[1] );
441 #elif BLAZE_SSE2_MODE
442  return a[0];
443 #else
444  return a.value;
445 #endif
446 }
447 //*************************************************************************************************
448 
449 } // namespace blaze
450 
451 #endif
Header file for auxiliary alias declarations.
SIMD type for 64-bit double precision floating point data values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
typename T::ValueType ValueType_
Alias declaration for nested ValueType type definitions.The ValueType_ alias declaration provides a c...
Definition: Aliases.h:443
Header file for the basic SIMD types.
BLAZE_ALWAYS_INLINE ValueType_< T > sum(const SIMDi8< T > &a) noexcept
Returns the sum of all elements in the 8-bit integral SIMD vector.
Definition: Reduction.h:65
SIMD type for 32-bit single precision complex values.
SIMD type for 32-bit single precision floating point data values.
System settings for the SSE mode.
System settings for the inline keywords.