Blaze 3.9
Sum.h
Go to the documentation of this file.
1//=================================================================================================
33//=================================================================================================
34
35#ifndef _BLAZE_MATH_SIMD_SUM_H_
36#define _BLAZE_MATH_SIMD_SUM_H_
37
38
39//*************************************************************************************************
40// Includes
41//*************************************************************************************************
42
43#include <blaze/math/Aliases.h>
45#include <blaze/system/Inline.h>
47
48
49namespace blaze {
50
51//=================================================================================================
52//
53// 8-BIT INTEGRAL SIMD TYPES
54//
55//=================================================================================================
56
57//*************************************************************************************************
64template< typename T > // Type of the SIMD element
65BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi8<T>& a ) noexcept
66{
67#if BLAZE_AVX512BW_MODE
68 return (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
69 (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] +
70 (*a)[16] + (*a)[17] + (*a)[18] + (*a)[19] + (*a)[20] + (*a)[21] + (*a)[22] + (*a)[23] +
71 (*a)[24] + (*a)[25] + (*a)[26] + (*a)[27] + (*a)[28] + (*a)[29] + (*a)[30] + (*a)[31] +
72 (*a)[32] + (*a)[33] + (*a)[34] + (*a)[35] + (*a)[36] + (*a)[37] + (*a)[38] + (*a)[39] +
73 (*a)[40] + (*a)[41] + (*a)[42] + (*a)[43] + (*a)[44] + (*a)[45] + (*a)[46] + (*a)[47] +
74 (*a)[48] + (*a)[49] + (*a)[50] + (*a)[51] + (*a)[52] + (*a)[53] + (*a)[54] + (*a)[55] +
75 (*a)[56] + (*a)[57] + (*a)[58] + (*a)[59] + (*a)[60] + (*a)[61] + (*a)[62] + (*a)[63];
76#elif BLAZE_AVX2_MODE
77 return (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
78 (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] +
79 (*a)[16] + (*a)[17] + (*a)[18] + (*a)[19] + (*a)[20] + (*a)[21] + (*a)[22] + (*a)[23] +
80 (*a)[24] + (*a)[25] + (*a)[26] + (*a)[27] + (*a)[28] + (*a)[29] + (*a)[30] + (*a)[31];
81#elif BLAZE_SSE2_MODE
82 return (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
83 (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15];
84#else
85 return (*a).value;
86#endif
87}
88//*************************************************************************************************
89
90
91//*************************************************************************************************
98template< typename T > // Type of the SIMD element
99BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci8<T>& a ) noexcept
100{
101#if BLAZE_AVX512BW_MODE
102 return (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
103 (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] +
104 (*a)[16] + (*a)[17] + (*a)[18] + (*a)[19] + (*a)[20] + (*a)[21] + (*a)[22] + (*a)[23] +
105 (*a)[24] + (*a)[25] + (*a)[26] + (*a)[27] + (*a)[28] + (*a)[29] + (*a)[30] + (*a)[31];
106#elif BLAZE_AVX2_MODE
107 return (*a)[0] + (*a)[1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
108 (*a)[8] + (*a)[9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15];
109#elif BLAZE_SSE2_MODE
110 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7];
111#else
112 return (*a).value;
113#endif
114}
115//*************************************************************************************************
116
117
118
119
120//=================================================================================================
121//
122// 16-BIT INTEGRAL SIMD TYPES
123//
124//=================================================================================================
125
126//*************************************************************************************************
133template< typename T > // Type of the SIMD element
134BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi16<T>& a ) noexcept
135{
136#if BLAZE_AVX512BW_MODE
137 const __m256i low ( _mm512_castsi512_si256( (*a).value ) );
138 const __m256i high( _mm512_extracti64x4_epi64( (*a).value, 1 ) );
139 const __m256i b ( _mm256_hadd_epi16( low, high ) );
140 const __m256i c ( _mm256_hadd_epi16( b, b ) );
141 const __m256i d ( _mm256_hadd_epi16( c, c ) );
142 const __m256i e ( _mm256_hadd_epi16( d, d ) );
143 const __m128i f ( _mm_add_epi16( _mm256_extracti128_si256( e, 1 )
144 , _mm256_castsi256_si128( e ) ) );
145 return _mm_extract_epi16( f, 0 );
146#elif BLAZE_AVX2_MODE
147 const __m256i b( _mm256_hadd_epi16( (*a).value, (*a).value ) );
148 const __m256i c( _mm256_hadd_epi16( b, b ) );
149 const __m256i d( _mm256_hadd_epi16( c, c ) );
150 const __m128i e( _mm_add_epi16( _mm256_extracti128_si256( d, 1 )
151 , _mm256_castsi256_si128( d ) ) );
152 return _mm_extract_epi16( e, 0 );
153#elif BLAZE_SSSE3_MODE
154 const __m128i b( _mm_hadd_epi16( (*a).value, (*a).value ) );
155 const __m128i c( _mm_hadd_epi16( b, b ) );
156 const __m128i d( _mm_hadd_epi16( c, c ) );
157 return _mm_extract_epi16( d, 0 );
158#elif BLAZE_SSE2_MODE
159 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7];
160#else
161 return (*a).value;
162#endif
163}
164//*************************************************************************************************
165
166
167//*************************************************************************************************
174template< typename T > // Type of the SIMD element
175BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci16<T>& a ) noexcept
176{
177#if BLAZE_AVX512BW_MODE
178 return (*a)[0] + (*a)[1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
179 (*a)[8] + (*a)[9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15];
180#elif BLAZE_AVX2_MODE
181 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7];
182#elif BLAZE_SSE2_MODE
183 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3];
184#else
185 return (*a).value;
186#endif
187}
188//*************************************************************************************************
189
190
191
192
193//=================================================================================================
194//
195// 32-BIT INTEGRAL SIMD TYPES
196//
197//=================================================================================================
198
199//*************************************************************************************************
206template< typename T > // Type of the SIMD element
207BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi32<T>& a ) noexcept
208{
209#if BLAZE_AVX512F_MODE
210 const __m256i low ( _mm512_castsi512_si256( (*a).value ) );
211 const __m256i high( _mm512_extracti64x4_epi64( (*a).value, 1 ) );
212 const __m256i b ( _mm256_hadd_epi32( low, high ) );
213 const __m256i c ( _mm256_hadd_epi32( b, b ) );
214 const __m256i d ( _mm256_hadd_epi32( c, c ) );
215 const __m128i e ( _mm_add_epi32( _mm256_extracti128_si256( d, 1 )
216 , _mm256_castsi256_si128( d ) ) );
217 return _mm_extract_epi32( e, 0 );
218#elif BLAZE_MIC_MODE
219 return _mm512_reduce_add_epi32( (*a).value );
220#elif BLAZE_AVX2_MODE
221 const __m256i b( _mm256_hadd_epi32( (*a).value, (*a).value ) );
222 const __m256i c( _mm256_hadd_epi32( b, b ) );
223 const __m128i d( _mm_add_epi32( _mm256_extracti128_si256( c, 1 )
224 , _mm256_castsi256_si128( c ) ) );
225 return _mm_extract_epi32( d, 0 );
226#elif BLAZE_SSSE3_MODE
227 const __m128i b( _mm_hadd_epi32( (*a).value, (*a).value ) );
228 return _mm_cvtsi128_si32( _mm_hadd_epi32( b, b ) );
229#elif BLAZE_SSE2_MODE
230 const __m128i b( _mm_add_epi32( (*a).value, _mm_shuffle_epi32( (*a).value, 0x4E ) ) );
231 return _mm_cvtsi128_si32( _mm_add_epi32( b, _mm_shuffle_epi32( b, 0xB1 ) ) );
232#else
233 return (*a).value;
234#endif
235}
236//*************************************************************************************************
237
238
239//*************************************************************************************************
246template< typename T > // Type of the SIMD element
247BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci32<T>& a ) noexcept
248{
249#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
250 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7];
251#elif BLAZE_AVX2_MODE
252 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3];
253#elif BLAZE_SSE2_MODE
254 return (*a)[0] + (*a)[1];
255#else
256 return (*a).value;
257#endif
258}
259//*************************************************************************************************
260
261
262
263
264//=================================================================================================
265//
266// 64-BIT INTEGRAL SIMD TYPES
267//
268//=================================================================================================
269
270//*************************************************************************************************
277template< typename T > // Type of the SIMD element
278BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi64<T>& a ) noexcept
279{
280#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
281 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7];
282#elif BLAZE_AVX2_MODE
283 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3];
284#elif BLAZE_SSE2_MODE
285 return (*a)[0] + (*a)[1];
286#else
287 return (*a).value;
288#endif
289}
290//*************************************************************************************************
291
292
293//*************************************************************************************************
300template< typename T > // Type of the SIMD element
301BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci64<T>& a ) noexcept
302{
303#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
304 return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3];
305#elif BLAZE_AVX2_MODE
306 return (*a)[0] + (*a)[1];
307#elif BLAZE_SSE2_MODE
308 return (*a)[0];
309#else
310 return (*a).value;
311#endif
312}
313//*************************************************************************************************
314
315
316
317
318//=================================================================================================
319//
320// 32-BIT FLOATING POINT SIMD TYPES
321//
322//=================================================================================================
323
324//*************************************************************************************************
331BLAZE_ALWAYS_INLINE float sum( const SIMDfloat& a ) noexcept
332{
333#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
334 return _mm512_reduce_add_ps( a.value );
335#elif BLAZE_AVX_MODE
336 const __m128 b( _mm_add_ps( _mm256_extractf128_ps( a.value, 1 ), _mm256_castps256_ps128( a.value ) ) );
337 const __m128 c( _mm_add_ps( b, _mm_movehl_ps( b, b ) ) );
338 return _mm_cvtss_f32( _mm_add_ss( c, _mm_shuffle_ps( c, c, 1 ) ) );
339#elif BLAZE_SSE3_MODE
340 const __m128 b( _mm_add_ps( a.value, _mm_movehl_ps( a.value, a.value ) ) );
341 return _mm_cvtss_f32( _mm_add_ss( b, _mm_shuffle_ps( b, b, 1 ) ) );
342#elif BLAZE_SSE_MODE
343 const __m128 b( _mm_add_ps( a.value, _mm_movehl_ps( a.value, a.value ) ) );
344 return _mm_cvtss_f32( _mm_add_ss( b, _mm_shuffle_ps( b, b, 1 ) ) );
345#else
346 return a.value;
347#endif
348}
349//*************************************************************************************************
350
351
352//*************************************************************************************************
359BLAZE_ALWAYS_INLINE const complex<float> sum( const SIMDcfloat& a ) noexcept
360{
361#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
362 return a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7];
363#elif BLAZE_AVX_MODE
364 return a[0] + a[1] + a[2] + a[3];
365#elif BLAZE_SSE_MODE
366 return a[0] + a[1];
367#else
368 return a.value;
369#endif
370}
371//*************************************************************************************************
372
373
374
375
376//=================================================================================================
377//
378// 64-BIT FLOATING POINT SIMD TYPES
379//
380//=================================================================================================
381
382//*************************************************************************************************
389BLAZE_ALWAYS_INLINE double sum( const SIMDdouble& a ) noexcept
390{
391#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
392 return _mm512_reduce_add_pd( a.value );
393#elif BLAZE_AVX_MODE
394 const __m128d b( _mm_add_pd( _mm256_castpd256_pd128( a.value ), _mm256_extractf128_pd( a.value, 1 ) ) );
395 return _mm_cvtsd_f64( _mm_add_sd( b, _mm_unpackhi_pd( b, b ) ) );
396#elif BLAZE_SSE2_MODE
397 return _mm_cvtsd_f64( _mm_add_sd( a.value, _mm_unpackhi_pd( a.value, a.value ) ) );
398#else
399 return a.value;
400#endif
401}
402//*************************************************************************************************
403
404
405//*************************************************************************************************
412BLAZE_ALWAYS_INLINE const complex<double> sum( const SIMDcdouble& a ) noexcept
413{
414#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
415 return a[0] + a[1] + a[2] + a[3];
416#elif BLAZE_AVX_MODE
417 return a[0] + a[1];
418#elif BLAZE_SSE2_MODE
419 return a[0];
420#else
421 return a.value;
422#endif
423}
424//*************************************************************************************************
425
426} // namespace blaze
427
428#endif
Header file for auxiliary alias declarations.
typename T::ValueType ValueType_t
Alias declaration for nested ValueType type definitions.
Definition: Aliases.h:570
Header file for the basic SIMD types.
SIMD type for 64-bit double precision complex values.
SIMD type for 32-bit single precision complex values.
SIMD type for 64-bit double precision floating point data values.
SIMD type for 32-bit single precision floating point data values.
BLAZE_ALWAYS_INLINE const complex< double > sum(const SIMDcdouble &a) noexcept
Returns the sum of all elements in the double precision complex SIMD vector.
Definition: Sum.h:412
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
System settings for the inline keywords.
System settings for the SSE mode.