Reduction.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_INTRINSICS_REDUCTION_H_
36 #define _BLAZE_MATH_INTRINSICS_REDUCTION_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
44 #include <blaze/system/Inline.h>
46 
47 
48 namespace blaze {
49 
50 //=================================================================================================
51 //
52 // INTRINSIC SUM OPERATION
53 //
54 //=================================================================================================
55 
56 //*************************************************************************************************
64 {
65 #if BLAZE_AVX2_MODE
66  const sse_int16_t b( _mm256_hadd_epi16( a.value, a.value ) );
67  const sse_int16_t c( _mm256_hadd_epi16( b.value, b.value ) );
68  const sse_int16_t d( _mm256_hadd_epi16( c.value, c.value ) );
69  const __m128i e = _mm_add_epi16( _mm256_extracti128_si256( d.value, 1 )
70  , _mm256_castsi256_si128( d.value ) );
71  return _mm_extract_epi16( e, 0 );
72 #elif BLAZE_SSSE3_MODE
73  const sse_int16_t b( _mm_hadd_epi16( a.value, a.value ) );
74  const sse_int16_t c( _mm_hadd_epi16( b.value, b.value ) );
75  const sse_int16_t d( _mm_hadd_epi16( c.value, c.value ) );
76  return d[0];
77 #elif BLAZE_SSE2_MODE
78  return a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7];
79 #else
80  return a.value;
81 #endif
82 }
83 //*************************************************************************************************
84 
85 
86 //*************************************************************************************************
94 {
95 #if BLAZE_MIC_MODE
96  return _mm512_reduce_add_epi32( a.value );
97 #elif BLAZE_AVX2_MODE
98  const sse_int32_t b( _mm256_hadd_epi32( a.value, a.value ) );
99  const sse_int32_t c( _mm256_hadd_epi32( b.value, b.value ) );
100  const __m128i d = _mm_add_epi32( _mm256_extracti128_si256( c.value, 1 )
101  , _mm256_castsi256_si128( c.value ) );
102  return _mm_extract_epi32( d, 0 );
103 #elif BLAZE_SSSE3_MODE
104  const sse_int32_t b( _mm_hadd_epi32( a.value, a.value ) );
105  const sse_int32_t c( _mm_hadd_epi32( b.value, b.value ) );
106  return c[0];
107 #elif BLAZE_SSE2_MODE
108  return a[0] + a[1] + a[2] + a[3];
109 #else
110  return a.value;
111 #endif
112 }
113 //*************************************************************************************************
114 
115 
116 //*************************************************************************************************
124 {
125 #if BLAZE_MIC_MODE
126  return _mm512_reduce_add_epi64( a.value );
127 #elif BLAZE_AVX2_MODE
128  return a[0] + a[1] + a[2] + a[3];
129 #elif BLAZE_SSE2_MODE
130  return a[0] + a[1];
131 #else
132  return a.value;
133 #endif
134 }
135 //*************************************************************************************************
136 
137 
138 //*************************************************************************************************
146 {
147 #if BLAZE_MIC_MODE
148  return _mm512_reduce_add_ps( a.value );
149 #elif BLAZE_AVX_MODE
150  const sse_float_t b( _mm256_hadd_ps( a.value, a.value ) );
151  const sse_float_t c( _mm256_hadd_ps( b.value, b.value ) );
152  const __m128 d = _mm_add_ps( _mm256_extractf128_ps( c.value, 1 )
153  , _mm256_castps256_ps128( c.value ) );
154  return *reinterpret_cast<const float*>( &d );
155 #elif BLAZE_SSE3_MODE
156  const sse_float_t b( _mm_hadd_ps( a.value, a.value ) );
157  const sse_float_t c( _mm_hadd_ps( b.value, b.value ) );
158  return c[0];
159 #elif BLAZE_SSE_MODE
160  return a[0] + a[1] + a[2] + a[3];
161 #else
162  return a.value;
163 #endif
164 }
165 //*************************************************************************************************
166 
167 
168 //*************************************************************************************************
176 {
177 #if BLAZE_MIC_MODE
178  return _mm512_reduce_add_pd( a.value );
179 #elif BLAZE_AVX_MODE
180  const sse_double_t b( _mm256_hadd_pd( a.value, a.value ) );
181  const __m128d c = _mm_add_pd( _mm256_extractf128_pd( b.value, 1 )
182  , _mm256_castpd256_pd128( b.value ) );
183  return *reinterpret_cast<const double*>( &c );
184 #elif BLAZE_SSE3_MODE
185  const sse_double_t b( _mm_hadd_pd( a.value, a.value ) );
186  return b[0];
187 #elif BLAZE_SSE2_MODE
188  return a[0] + a[1];
189 #else
190  return a.value;
191 #endif
192 }
193 //*************************************************************************************************
194 
195 
196 //*************************************************************************************************
203 BLAZE_ALWAYS_INLINE complex<float> sum( const sse_cfloat_t& a )
204 {
205 #if BLAZE_MIC_MODE
206  return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
207 #elif BLAZE_AVX_MODE
208  return complex<float>( a[0] + a[1] + a[2] + a[3] );
209 #elif BLAZE_SSE_MODE
210  return complex<float>( a[0] + a[1] );
211 #else
212  return a.value;
213 #endif
214 }
215 //*************************************************************************************************
216 
217 
218 //*************************************************************************************************
225 BLAZE_ALWAYS_INLINE complex<double> sum( const sse_cdouble_t& a )
226 {
227 #if BLAZE_MIC_MODE
228  return complex<double>( a[0] + a[1] + a[2] + a[3] );
229 #elif BLAZE_AVX_MODE
230  return complex<double>( a[0] + a[1] );
231 #elif BLAZE_SSE2_MODE
232  return a[0];
233 #else
234  return a.value;
235 #endif
236 }
237 //*************************************************************************************************
238 
239 } // namespace blaze
240 
241 #endif
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Intrinsic type for 32-bit single precision complex values.
16-bit signed integer type of the Blaze library.
Intrinsic type for 64-bit integral data values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Intrinsic type for 16-bit integral data values.
Intrinsic type for 32-bit single precision floating point data values.
Intrinsic type for 64-bit double precision complex values.
Intrinsic type for 64-bit double precision floating point data values.
Header file for the basic intrinsic types.
System settings for the SSE mode.
64-bit signed integer type of the Blaze library.
Intrinsic type for 32-bit integral data values.
System settings for the inline keywords.
32-bit signed integer type of the Blaze library.