DVecReduceExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECREDUCEEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DVECREDUCEEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
49 #include <blaze/math/SIMD.h>
54 #include <blaze/system/Compiler.h>
55 #include <blaze/util/Assert.h>
56 #include <blaze/util/DisableIf.h>
57 #include <blaze/util/EnableIf.h>
59 #include <blaze/util/Types.h>
62 
63 
64 namespace blaze {
65 
66 //=================================================================================================
67 //
68 // CLASS DEFINITION
69 //
70 //=================================================================================================
71 
72 //*************************************************************************************************
77 template< typename VT // Type of the dense vector
78  , typename OP > // Type of the reduction operation
79 struct DVecReduceExprHelper
80 {
81  //**Type definitions****************************************************************************
83  using CT = RemoveReference_t< CompositeType_t<VT> >;
84 
86  using ET = ElementType_t<CT>;
87  //**********************************************************************************************
88 
89  //**********************************************************************************************
90  static constexpr bool value =
91  ( CT::simdEnabled &&
92  If_t< HasSIMDEnabled_v<OP>, GetSIMDEnabled<OP,ET,ET>, HasLoad<OP> >::value );
93  //**********************************************************************************************
94 };
96 //*************************************************************************************************
97 
98 
99 
100 
101 //=================================================================================================
102 //
103 // GLOBAL FUNCTIONS
104 //
105 //=================================================================================================
106 
107 //*************************************************************************************************
120 template< typename VT // Type of the dense vector
121  , bool TF // Transpose flag
122  , typename OP > // Type of the reduction operation
123 inline auto dvecreduce( const DenseVector<VT,TF>& dv, OP op )
124  -> DisableIf_t< DVecReduceExprHelper<VT,OP>::value, ElementType_t<VT> >
125 {
126  using CT = CompositeType_t<VT>;
127  using ET = ElementType_t<VT>;
128 
129  const size_t N( (~dv).size() );
130 
131  if( N == 0UL ) return ET{};
132  if( N == 1UL ) return (~dv)[0UL];
133 
134  CT tmp( ~dv );
135 
136  BLAZE_INTERNAL_ASSERT( tmp.size() == N, "Invalid vector size" );
137 
138  ET redux1( tmp[0UL] );
139  ET redux2( tmp[1UL] );
140  size_t i( 2UL );
141 
142  for( ; (i+4UL) <= N; i+=4UL ) {
143  redux1 = op( op( redux1, tmp[i ] ), tmp[i+1UL] );
144  redux2 = op( op( redux2, tmp[i+2UL] ), tmp[i+3UL] );
145  }
146  for( ; (i+2UL) <= N; i+=2UL ) {
147  redux1 = op( redux1, tmp[i ] );
148  redux2 = op( redux2, tmp[i+1UL] );
149  }
150  for( ; i<N; ++i ) {
151  redux1 = op( redux1, tmp[i] );
152  }
153 
154  return op( redux1, redux2 );
155 }
157 //*************************************************************************************************
158 
159 
160 //*************************************************************************************************
173 template< typename VT // Type of the dense vector
174  , bool TF // Transpose flag
175  , typename OP > // Type of the reduction operation
176 inline auto dvecreduce( const DenseVector<VT,TF>& dv, OP op )
177  -> EnableIf_t< DVecReduceExprHelper<VT,OP>::value, ElementType_t<VT> >
178 {
179  using CT = CompositeType_t<VT>;
180  using ET = ElementType_t<VT>;
181 
182  const size_t N( (~dv).size() );
183 
184  if( N == 0UL ) return ET{};
185 
186  CT tmp( ~dv );
187 
188  BLAZE_INTERNAL_ASSERT( tmp.size() == N, "Invalid vector size" );
189 
190  constexpr size_t SIMDSIZE = SIMDTrait<ET>::size;
191 
192  ET redux{};
193 
194  if( N >= SIMDSIZE )
195  {
196  const size_t ipos( N & size_t(-SIMDSIZE) );
197  BLAZE_INTERNAL_ASSERT( ( N - ( N % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
198 
199  SIMDTrait_t<ET> xmm1( tmp.load(0UL) );
200 
201  if( N >= SIMDSIZE*2UL )
202  {
203  SIMDTrait_t<ET> xmm2( tmp.load(SIMDSIZE) );
204  size_t i( SIMDSIZE*2UL );
205 
206  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
207  xmm1 = op( xmm1, tmp.load(i ) );
208  xmm2 = op( xmm2, tmp.load(i+SIMDSIZE) );
209  }
210  for( ; i<ipos; i+=SIMDSIZE ) {
211  xmm1 = op( xmm1, tmp.load(i) );
212  }
213 
214  xmm1 = op( xmm1, xmm2 );
215  }
216 
217  redux = reduce( xmm1, op );
218 
219  for( size_t i=ipos; i<N; ++i ) {
220  redux = op( redux, tmp[i] );
221  }
222  }
223  else {
224  redux = tmp[0UL];
225  for( size_t i=1UL; i<N; ++i ) {
226  redux = op( redux, tmp[i] );
227  }
228  }
229 
230  return redux;
231 }
233 //*************************************************************************************************
234 
235 
236 //*************************************************************************************************
248 template< typename VT // Type of the dense vector
249  , bool TF > // Transpose flag
250 inline auto dvecreduce( const DenseVector<VT,TF>& dv, Add /*op*/ )
251  -> EnableIf_t< DVecReduceExprHelper<VT,Add>::value, ElementType_t<VT> >
252 {
253  using CT = CompositeType_t<VT>;
254  using ET = ElementType_t<VT>;
255 
256  const size_t N( (~dv).size() );
257 
258  if( N == 0UL ) return ET{};
259 
260  CT tmp( ~dv );
261 
262  BLAZE_INTERNAL_ASSERT( tmp.size() == N, "Invalid vector size" );
263 
264  constexpr bool remainder( !usePadding || !IsPadded_v< RemoveReference_t<CT> > );
265  constexpr size_t SIMDSIZE = SIMDTrait<ET>::size;
266 
267  ET redux{};
268 
269  if( !BLAZE_CLANG_COMPILER && !remainder )
270  {
271  SIMDTrait_t<ET> xmm1, xmm2;
272  size_t i( 0UL );
273 
274  for( ; (i+SIMDSIZE) < N; i+=SIMDSIZE*2UL ) {
275  xmm1 += tmp.load(i );
276  xmm2 += tmp.load(i+SIMDSIZE);
277  }
278  if( i < N ) {
279  xmm1 += tmp.load(i);
280  }
281 
282  redux = sum( xmm1 + xmm2 );
283  }
284  else if( !remainder || N >= SIMDSIZE )
285  {
286  const size_t ipos( ( remainder )?( N & size_t(-SIMDSIZE) ):( N ) );
287  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
288 
289  SIMDTrait_t<ET> xmm1( tmp.load(0UL) );
290 
291  if( remainder ? N >= SIMDSIZE*2UL : N > SIMDSIZE )
292  {
293  SIMDTrait_t<ET> xmm2( tmp.load(SIMDSIZE) );
294  size_t i( SIMDSIZE*2UL );
295 
296  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
297  xmm1 += tmp.load(i );
298  xmm2 += tmp.load(i+SIMDSIZE);
299  }
300  for( ; i<ipos; i+=SIMDSIZE ) {
301  xmm1 += tmp.load(i);
302  }
303 
304  xmm1 += xmm2;
305  }
306 
307  redux = sum( xmm1 );
308 
309  for( size_t i=ipos; remainder && i<N; ++i ) {
310  redux += tmp[i];
311  }
312  }
313  else {
314  redux = tmp[0UL];
315  for( size_t i=1UL; i<N; ++i ) {
316  redux += tmp[i];
317  }
318  }
319 
320  return redux;
321 }
323 //*************************************************************************************************
324 
325 
326 //*************************************************************************************************
337 template< typename VT // Type of the dense vector
338  , bool TF > // Transpose flag
339 inline auto dvecreduce( const DenseVector<VT,TF>& dv, Min /*op*/ )
340  -> EnableIf_t< IsUniform_v<VT>, ElementType_t<VT> >
341 {
342  return (~dv)[0UL];
343 }
345 //*************************************************************************************************
346 
347 
348 //*************************************************************************************************
359 template< typename VT // Type of the dense vector
360  , bool TF > // Transpose flag
361 inline auto dvecreduce( const DenseVector<VT,TF>& dv, Max /*op*/ )
362  -> EnableIf_t< IsUniform_v<VT>, ElementType_t<VT> >
363 {
364  return (~dv)[0UL];
365 }
367 //*************************************************************************************************
368 
369 
370 //*************************************************************************************************
399 template< typename VT // Type of the dense vector
400  , bool TF // Transpose flag
401  , typename OP > // Type of the reduction operation
402 inline decltype(auto) reduce( const DenseVector<VT,TF>& dv, OP op )
403 {
405 
406  return dvecreduce( ~dv, op );
407 }
408 //*************************************************************************************************
409 
410 
411 //*************************************************************************************************
428 template< typename VT // Type of the dense vector
429  , bool TF > // Transpose flag
430 inline decltype(auto) sum( const DenseVector<VT,TF>& dv )
431 {
433 
434  return reduce( ~dv, Add() );
435 }
436 //*************************************************************************************************
437 
438 
439 //*************************************************************************************************
455 template< typename VT // Type of the dense vector
456  , bool TF > // Transpose flag
457 inline decltype(auto) prod( const DenseVector<VT,TF>& dv )
458 {
460 
461  return reduce( ~dv, Mult() );
462 }
463 //*************************************************************************************************
464 
465 
466 //*************************************************************************************************
483 template< typename VT // Type of the dense vector
484  , bool TF > // Transpose flag
485 inline decltype(auto) min( const DenseVector<VT,TF>& dv )
486 {
488 
489  return reduce( ~dv, Min() );
490 }
491 //*************************************************************************************************
492 
493 
494 //*************************************************************************************************
511 template< typename VT // Type of the dense vector
512  , bool TF > // Transpose flag
513 inline decltype(auto) max( const DenseVector<VT,TF>& dv )
514 {
516 
517  return reduce( ~dv, Max() );
518 }
519 //*************************************************************************************************
520 
521 } // namespace blaze
522 
523 #endif
Header file for auxiliary alias declarations.
Header file for the HasLoad type trait.
Header file for basic type definitions.
Header file for the Add functor.
decltype(auto) prod(const DenseMatrix< MT, SO > &dm)
Reduces the given dense matrix by means of multiplication.
Definition: DMatReduceExpr.h:2219
Header file for the DenseVector base class.
Generic wrapper for the addition operator.
Definition: Add.h:80
decltype(auto) reduce(const DenseMatrix< MT, SO > &dm, OP op)
Performs a custom reduction operation on the given dense matrix.
Definition: DMatReduceExpr.h:2016
Header file for the DisableIf class template.
Header file for the IsUniform type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
decltype(auto) sum(const DenseMatrix< MT, SO > &dm)
Reduces the given dense matrix by means of addition.
Definition: DMatReduceExpr.h:2146
Header file for the Mult functor.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:76
Compiler-specific system settings.
decltype(auto) max(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise maximum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1179
Header file for the EnableIf class template.
Header file for the IsPadded type trait.
Header file for the IsSIMDEnabled type trait.
Header file for run time assertion macros.
Generic wrapper for the max() function.
Definition: Max.h:79
constexpr bool IsPadded_v
Auxiliary variable template for the IsPadded type trait.The IsPadded_v variable template provides a c...
Definition: IsPadded.h:135
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
Header file for the HasMember type traits.
Header file for the RemoveReference type trait.
Generic wrapper for the min() function.
Definition: Min.h:79
Header file for the Min functor.
Generic wrapper for the multiplication operator.
Definition: Mult.h:77
Header file for the Max functor.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the function trace functionality.