Reduction.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_INTRINSICS_REDUCTION_H_
36 #define _BLAZE_MATH_INTRINSICS_REDUCTION_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
44 #include <blaze/system/Inline.h>
46 
47 
48 namespace blaze {
49 
50 //=================================================================================================
51 //
52 // INTRINSIC SUM OPERATION
53 //
54 //=================================================================================================
55 
56 //*************************************************************************************************
64 {
65 #if BLAZE_AVX2_MODE
66  const simd_int16_t b( _mm256_hadd_epi16( a.value, a.value ) );
67  const simd_int16_t c( _mm256_hadd_epi16( b.value, b.value ) );
68  const simd_int16_t d( _mm256_hadd_epi16( c.value, c.value ) );
69  const __m128i e = _mm_add_epi16( _mm256_extracti128_si256( d.value, 1 )
70  , _mm256_castsi256_si128( d.value ) );
71  return _mm_extract_epi16( e, 0 );
72 #elif BLAZE_SSSE3_MODE
73  const simd_int16_t b( _mm_hadd_epi16( a.value, a.value ) );
74  const simd_int16_t c( _mm_hadd_epi16( b.value, b.value ) );
75  const simd_int16_t d( _mm_hadd_epi16( c.value, c.value ) );
76  return d[0];
77 #elif BLAZE_SSE2_MODE
78  return a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7];
79 #else
80  return a.value;
81 #endif
82 }
83 //*************************************************************************************************
84 
85 
86 //*************************************************************************************************
94 {
95 #if BLAZE_MIC_MODE
96  return _mm512_reduce_add_epi32( a.value );
97 #elif BLAZE_AVX2_MODE
98  const simd_int32_t b( _mm256_hadd_epi32( a.value, a.value ) );
99  const simd_int32_t c( _mm256_hadd_epi32( b.value, b.value ) );
100  const __m128i d = _mm_add_epi32( _mm256_extracti128_si256( c.value, 1 )
101  , _mm256_castsi256_si128( c.value ) );
102  return _mm_extract_epi32( d, 0 );
103 #elif BLAZE_SSSE3_MODE
104  const simd_int32_t b( _mm_hadd_epi32( a.value, a.value ) );
105  const simd_int32_t c( _mm_hadd_epi32( b.value, b.value ) );
106  return c[0];
107 #elif BLAZE_SSE2_MODE
108  return a[0] + a[1] + a[2] + a[3];
109 #else
110  return a.value;
111 #endif
112 }
113 //*************************************************************************************************
114 
115 
116 //*************************************************************************************************
124 {
125 #if BLAZE_MIC_MODE
126  return _mm512_reduce_add_epi64( a.value );
127 #elif BLAZE_AVX2_MODE
128  return a[0] + a[1] + a[2] + a[3];
129 #elif BLAZE_SSE2_MODE
130  return a[0] + a[1];
131 #else
132  return a.value;
133 #endif
134 }
135 //*************************************************************************************************
136 
137 
138 //*************************************************************************************************
146 {
147 #if BLAZE_MIC_MODE
148  return _mm512_reduce_add_ps( a.value );
149 #elif BLAZE_AVX_MODE
150  const simd_float_t b( _mm256_hadd_ps( a.value, a.value ) );
151  const simd_float_t c( _mm256_hadd_ps( b.value, b.value ) );
152  const __m128 d = _mm_add_ps( _mm256_extractf128_ps( c.value, 1 )
153  , _mm256_castps256_ps128( c.value ) );
154  return *reinterpret_cast<const float*>( &d );
155 #elif BLAZE_SSE3_MODE
156  const simd_float_t b( _mm_hadd_ps( a.value, a.value ) );
157  const simd_float_t c( _mm_hadd_ps( b.value, b.value ) );
158  return c[0];
159 #elif BLAZE_SSE_MODE
160  return a[0] + a[1] + a[2] + a[3];
161 #else
162  return a.value;
163 #endif
164 }
165 //*************************************************************************************************
166 
167 
168 //*************************************************************************************************
176 {
177 #if BLAZE_MIC_MODE
178  return _mm512_reduce_add_pd( a.value );
179 #elif BLAZE_AVX_MODE
180  const simd_double_t b( _mm256_hadd_pd( a.value, a.value ) );
181  const __m128d c = _mm_add_pd( _mm256_extractf128_pd( b.value, 1 )
182  , _mm256_castpd256_pd128( b.value ) );
183  return *reinterpret_cast<const double*>( &c );
184 #elif BLAZE_SSE3_MODE
185  const simd_double_t b( _mm_hadd_pd( a.value, a.value ) );
186  return b[0];
187 #elif BLAZE_SSE2_MODE
188  return a[0] + a[1];
189 #else
190  return a.value;
191 #endif
192 }
193 //*************************************************************************************************
194 
195 
196 //*************************************************************************************************
203 BLAZE_ALWAYS_INLINE complex<int8_t> sum( const simd_cint8_t& a )
204 {
205 #if BLAZE_AVX2_MODE
206  return complex<int8_t>( a[0] + a[1] + a[ 2] + a[ 3] + a[ 4] + a[ 5] + a[ 6] + a[ 7] +
207  a[8] + a[9] + a[10] + a[11] + a[12] + a[13] + a[14] + a[15] );
208 #elif BLAZE_SSE2_MODE
209  return complex<int8_t>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
210 #else
211  return a.value;
212 #endif
213 }
214 //*************************************************************************************************
215 
216 
217 //*************************************************************************************************
224 BLAZE_ALWAYS_INLINE complex<int16_t> sum( const simd_cint16_t& a )
225 {
226 #if BLAZE_AVX2_MODE
227  return complex<int16_t>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
228 #elif BLAZE_SSE2_MODE
229  return complex<int16_t>( a[0] + a[1] + a[2] + a[3] );
230 #else
231  return a.value;
232 #endif
233 }
234 //*************************************************************************************************
235 
236 
237 //*************************************************************************************************
244 BLAZE_ALWAYS_INLINE complex<int32_t> sum( const simd_cint32_t& a )
245 {
246 #if BLAZE_AVX2_MODE
247  return complex<int32_t>( a[0] + a[1] + a[2] + a[3] );
248 #elif BLAZE_SSE2_MODE
249  return complex<int32_t>( a[0] + a[1] );
250 #else
251  return a.value;
252 #endif
253 }
254 //*************************************************************************************************
255 
256 
257 //*************************************************************************************************
264 BLAZE_ALWAYS_INLINE complex<int64_t> sum( const simd_cint64_t& a )
265 {
266 #if BLAZE_AVX2_MODE
267  return complex<int64_t>( a[0] + a[1] );
268 #elif BLAZE_SSE2_MODE
269  return a[0];
270 #else
271  return a.value;
272 #endif
273 }
274 //*************************************************************************************************
275 
276 
277 //*************************************************************************************************
284 BLAZE_ALWAYS_INLINE complex<float> sum( const simd_cfloat_t& a )
285 {
286 #if BLAZE_MIC_MODE
287  return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
288 #elif BLAZE_AVX_MODE
289  return complex<float>( a[0] + a[1] + a[2] + a[3] );
290 #elif BLAZE_SSE_MODE
291  return complex<float>( a[0] + a[1] );
292 #else
293  return a.value;
294 #endif
295 }
296 //*************************************************************************************************
297 
298 
299 //*************************************************************************************************
306 BLAZE_ALWAYS_INLINE complex<double> sum( const simd_cdouble_t& a )
307 {
308 #if BLAZE_MIC_MODE
309  return complex<double>( a[0] + a[1] + a[2] + a[3] );
310 #elif BLAZE_AVX_MODE
311  return complex<double>( a[0] + a[1] );
312 #elif BLAZE_SSE2_MODE
313  return a[0];
314 #else
315  return a.value;
316 #endif
317 }
318 //*************************************************************************************************
319 
320 } // namespace blaze
321 
322 #endif
Intrinsic type for 16-bit integral data values.
16-bit signed integer type of the Blaze library.
Intrinsic type for 64-bit double precision floating point data values.
Intrinsic type for 8-bit integral complex values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Header file for the basic intrinsic types.
Intrinsic type for 64-bit integral complex values.
Intrinsic type for 32-bit single precision floating point data values.
Intrinsic type for 64-bit integral data values.
Intrinsic type for 32-bit integral complex values.
Intrinsic type for 64-bit double precision complex values.
BLAZE_ALWAYS_INLINE int16_t sum(const simd_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Intrinsic type for 32-bit single precision complex values.
Intrinsic type for 16-bit integral complex values.
System settings for the SSE mode.
64-bit signed integer type of the Blaze library.
System settings for the inline keywords.
32-bit signed integer type of the Blaze library.
Intrinsic type for 32-bit integral data values.