Blaze 3.9
DVecReduceExpr.h
Go to the documentation of this file.
1//=================================================================================================
33//=================================================================================================
34
35#ifndef _BLAZE_MATH_EXPRESSIONS_DVECREDUCEEXPR_H_
36#define _BLAZE_MATH_EXPRESSIONS_DVECREDUCEEXPR_H_
37
38
39//*************************************************************************************************
40// Includes
41//*************************************************************************************************
42
43#include <blaze/math/Aliases.h>
50#include <blaze/math/SIMD.h>
55#include <blaze/util/Assert.h>
56#include <blaze/util/EnableIf.h>
58#include <blaze/util/Types.h>
62
63
64namespace blaze {
65
66//=================================================================================================
67//
68// CLASS DEFINITION
69//
70//=================================================================================================
71
72//*************************************************************************************************
77template< typename VT // Type of the dense vector
78 , typename OP > // Type of the reduction operation
79struct DVecReduceExprHelper
80{
81 //**Type definitions****************************************************************************
83 using CT = RemoveReference_t< CompositeType_t<VT> >;
84
86 using ET = RemoveCV_t< ElementType_t<CT> >;
87 //**********************************************************************************************
88
89 //**********************************************************************************************
90 static constexpr bool value =
91 ( CT::simdEnabled &&
92 If_t< HasSIMDEnabled_v<OP>, GetSIMDEnabled<OP,ET,ET>, HasLoad<OP> >::value );
93 //**********************************************************************************************
94};
96//*************************************************************************************************
97
98
99
100
101//=================================================================================================
102//
103// GLOBAL FUNCTIONS
104//
105//=================================================================================================
106
107//*************************************************************************************************
120template< typename VT // Type of the dense vector
121 , bool TF // Transpose flag
122 , typename OP > // Type of the reduction operation
123inline auto dvecreduce( const DenseVector<VT,TF>& dv, OP op )
124 -> DisableIf_t< DVecReduceExprHelper<VT,OP>::value, RemoveCV_t< ReduceTrait_t<VT,OP> > >
125{
126 using CT = CompositeType_t<VT>;
127 using RT = RemoveCV_t< ReduceTrait_t<VT,OP> >;
128
129 const size_t N( (*dv).size() );
130
131 if( N == 0UL ) return RT{};
132 if( N == 1UL ) return (*dv)[0UL];
133
134 CT tmp( *dv );
135
136 BLAZE_INTERNAL_ASSERT( tmp.size() == N, "Invalid vector size" );
137
138 RT redux1( tmp[0UL] );
139 RT redux2( tmp[1UL] );
140 size_t i( 2UL );
141
142 for( ; (i+4UL) <= N; i+=4UL ) {
143 redux1 = op( op( redux1, tmp[i ] ), tmp[i+1UL] );
144 redux2 = op( op( redux2, tmp[i+2UL] ), tmp[i+3UL] );
145 }
146 for( ; (i+2UL) <= N; i+=2UL ) {
147 redux1 = op( redux1, tmp[i ] );
148 redux2 = op( redux2, tmp[i+1UL] );
149 }
150 for( ; i<N; ++i ) {
151 redux1 = op( redux1, tmp[i] );
152 }
153
154 return op( redux1, redux2 );
155}
157//*************************************************************************************************
158
159
160//*************************************************************************************************
173template< typename VT // Type of the dense vector
174 , bool TF // Transpose flag
175 , typename OP > // Type of the reduction operation
176inline auto dvecreduce( const DenseVector<VT,TF>& dv, OP op )
177 -> EnableIf_t< DVecReduceExprHelper<VT,OP>::value, RemoveCV_t< ElementType_t<VT> > >
178{
179 using CT = CompositeType_t<VT>;
180 using ET = RemoveCV_t< ElementType_t<VT> >;
181
182 const size_t N( (*dv).size() );
183
184 if( N == 0UL ) return ET{};
185
186 CT tmp( *dv );
187
188 BLAZE_INTERNAL_ASSERT( tmp.size() == N, "Invalid vector size" );
189
190 constexpr size_t SIMDSIZE = SIMDTrait<ET>::size;
191
192 ET redux{};
193
194 if( N >= SIMDSIZE )
195 {
196 const size_t ipos( prevMultiple( N, SIMDSIZE ) );
197 BLAZE_INTERNAL_ASSERT( ipos <= N, "Invalid end calculation" );
198
199 SIMDTrait_t<ET> xmm1( tmp.load(0UL) );
200
201 if( N >= SIMDSIZE*2UL )
202 {
203 SIMDTrait_t<ET> xmm2( tmp.load(SIMDSIZE) );
204 size_t i( SIMDSIZE*2UL );
205
206 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
207 xmm1 = op( xmm1, tmp.load(i ) );
208 xmm2 = op( xmm2, tmp.load(i+SIMDSIZE) );
209 }
210 for( ; i<ipos; i+=SIMDSIZE ) {
211 xmm1 = op( xmm1, tmp.load(i) );
212 }
213
214 xmm1 = op( xmm1, xmm2 );
215 }
216
217 redux = reduce( xmm1, op );
218
219 for( size_t i=ipos; i<N; ++i ) {
220 redux = op( redux, tmp[i] );
221 }
222 }
223 else {
224 redux = tmp[0UL];
225 for( size_t i=1UL; i<N; ++i ) {
226 redux = op( redux, tmp[i] );
227 }
228 }
229
230 return redux;
231}
233//*************************************************************************************************
234
235
236//*************************************************************************************************
248template< typename VT // Type of the dense vector
249 , bool TF > // Transpose flag
250inline auto dvecreduce( const DenseVector<VT,TF>& dv, Add /*op*/ )
251 -> EnableIf_t< DVecReduceExprHelper<VT,Add>::value, RemoveCV_t< ElementType_t<VT> > >
252{
253 using CT = CompositeType_t<VT>;
254 using ET = RemoveCV_t< ElementType_t<VT> >;
255
256 const size_t N( (*dv).size() );
257
258 if( N == 0UL ) return ET{};
259
260 CT tmp( *dv );
261
262 BLAZE_INTERNAL_ASSERT( tmp.size() == N, "Invalid vector size" );
263
264 constexpr bool remainder( !IsPadded_v< RemoveReference_t<CT> > );
265 constexpr size_t SIMDSIZE = SIMDTrait<ET>::size;
266
267 const size_t ipos( remainder ? prevMultiple( N, SIMDSIZE ) : N );
268 BLAZE_INTERNAL_ASSERT( ipos <= N, "Invalid end calculation" );
269
270 size_t i( 0UL );
271 ET redux{};
272
273 if( SIMDSIZE*3UL < ipos )
274 {
275 SIMDTrait_t<ET> xmm1{}, xmm2{}, xmm3{}, xmm4{};
276
277 for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
278 xmm1 += tmp.load(i );
279 xmm2 += tmp.load(i+SIMDSIZE );
280 xmm3 += tmp.load(i+SIMDSIZE*2UL);
281 xmm4 += tmp.load(i+SIMDSIZE*3UL);
282 }
283 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
284 xmm1 += tmp.load(i );
285 xmm2 += tmp.load(i+SIMDSIZE);
286 }
287 for( ; i<ipos; i+=SIMDSIZE ) {
288 xmm1 += tmp.load(i);
289 }
290
291 redux = sum( xmm1 + xmm2 + xmm3 + xmm4 );
292 }
293 else if( SIMDSIZE < ipos )
294 {
295 SIMDTrait_t<ET> xmm1{}, xmm2{};
296
297 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
298 xmm1 += tmp.load(i );
299 xmm2 += tmp.load(i+SIMDSIZE);
300 }
301 for( ; i<ipos; i+=SIMDSIZE ) {
302 xmm1 += tmp.load(i);
303 }
304
305 redux = sum( xmm1 + xmm2 );
306 }
307 else
308 {
309 SIMDTrait_t<ET> xmm1{};
310
311 for( ; i<ipos; i+=SIMDSIZE ) {
312 xmm1 += tmp.load(i);
313 }
314
315 redux = sum( xmm1 );
316 }
317
318 for( ; remainder && i<N; ++i ) {
319 redux += tmp[i];
320 }
321
322 return redux;
323}
325//*************************************************************************************************
326
327
328//*************************************************************************************************
339template< typename VT // Type of the dense vector
340 , bool TF > // Transpose flag
341inline auto dvecreduce( const DenseVector<VT,TF>& dv, Min /*op*/ )
342 -> EnableIf_t< IsUniform_v<VT>, RemoveCV_t< ElementType_t<VT> > >
343{
344 return (*dv)[0UL];
345}
347//*************************************************************************************************
348
349
350//*************************************************************************************************
361template< typename VT // Type of the dense vector
362 , bool TF > // Transpose flag
363inline auto dvecreduce( const DenseVector<VT,TF>& dv, Max /*op*/ )
364 -> EnableIf_t< IsUniform_v<VT>, RemoveCV_t< ElementType_t<VT> > >
365{
366 return (*dv)[0UL];
367}
369//*************************************************************************************************
370
371
372//*************************************************************************************************
401template< typename VT // Type of the dense vector
402 , bool TF // Transpose flag
403 , typename OP > // Type of the reduction operation
404inline decltype(auto) reduce( const DenseVector<VT,TF>& dv, OP op )
405{
407
408 return dvecreduce( *dv, std::move(op) );
409}
410//*************************************************************************************************
411
412
413//*************************************************************************************************
430template< typename VT // Type of the dense vector
431 , bool TF > // Transpose flag
432inline decltype(auto) sum( const DenseVector<VT,TF>& dv )
433{
435
436 return reduce( *dv, Add() );
437}
438//*************************************************************************************************
439
440
441//*************************************************************************************************
457template< typename VT // Type of the dense vector
458 , bool TF > // Transpose flag
459inline decltype(auto) prod( const DenseVector<VT,TF>& dv )
460{
462
463 return reduce( *dv, Mult() );
464}
465//*************************************************************************************************
466
467
468//*************************************************************************************************
485template< typename VT // Type of the dense vector
486 , bool TF > // Transpose flag
487inline decltype(auto) min( const DenseVector<VT,TF>& dv )
488{
490
491 return reduce( *dv, Min() );
492}
493//*************************************************************************************************
494
495
496//*************************************************************************************************
513template< typename VT // Type of the dense vector
514 , bool TF > // Transpose flag
515inline decltype(auto) max( const DenseVector<VT,TF>& dv )
516{
518
519 return reduce( *dv, Max() );
520}
521//*************************************************************************************************
522
523
524//*************************************************************************************************
540template< typename VT // Type of the dense vector
541 , bool TF > // Transpose flag
542inline size_t argmin( const DenseVector<VT,TF>& dv )
543{
544 if( (*dv).size() < 2UL )
545 return 0UL;
546
547 CompositeType_t<VT> a( *dv ); // Evaluation of the dense vector operand
548
549 const size_t size( a.size() );
550 size_t index( 0UL );
551 auto min( a[0UL] );
552
553 for( size_t i=1UL; i<size; ++i ) {
554 auto cur( a[i] );
555 if( cur < min ) {
556 index = i;
557 min = std::move( cur );
558 }
559 }
560
561 return index;
562}
563//*************************************************************************************************
564
565
566//*************************************************************************************************
582template< typename VT // Type of the dense vector
583 , bool TF > // Transpose flag
584inline size_t argmax( const DenseVector<VT,TF>& dv )
585{
586 if( (*dv).size() < 2UL )
587 return 0UL;
588
589 CompositeType_t<VT> a( *dv ); // Evaluation of the dense vector operand
590
591 const size_t size( a.size() );
592 size_t index( 0UL );
593 auto max( a[0UL] );
594
595 for( size_t i=1UL; i<size; ++i ) {
596 auto cur( a[i] );
597 if( max < cur ) {
598 index = i;
599 max = std::move( cur );
600 }
601 }
602
603 return index;
604}
605//*************************************************************************************************
606
607} // namespace blaze
608
609#endif
Header file for auxiliary alias declarations.
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.
Definition: Aliases.h:110
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the HasLoad type trait.
Header file for the HasMember type traits.
Header file for the IsPadded type trait.
Header file for the IsSIMDEnabled type trait.
Header file for the IsUniform type trait.
Header file for the prevMultiple shim.
Header file for the RemoveCV type trait.
Header file for the RemoveReference type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.
Definition: DenseVector.h:77
Header file for the DenseVector base class.
Header file for the Add functor.
Header file for the Mult functor.
decltype(auto) prod(const DenseVector< VT, TF > &dv)
Reduces the given dense vector by means of multiplication.
Definition: DVecReduceExpr.h:459
decltype(auto) sum(const DenseVector< VT, TF > &dv)
Reduces the given dense vector by means of addition.
Definition: DVecReduceExpr.h:432
size_t argmax(const DenseVector< VT, TF > &dv)
Returns the index of the first largest element of the dense vector.
Definition: DVecReduceExpr.h:584
decltype(auto) min(const DenseVector< VT, TF > &dv)
Returns the smallest element of the dense vector.
Definition: DVecReduceExpr.h:487
size_t argmin(const DenseVector< VT, TF > &dv)
Returns the index of the first smallest element of the dense vector.
Definition: DVecReduceExpr.h:542
decltype(auto) max(const DenseVector< VT, TF > &dv)
Returns the largest element of the dense vector.
Definition: DVecReduceExpr.h:515
decltype(auto) reduce(const DenseVector< VT, TF > &dv, OP op)
Performs a custom reduction operation on the given dense vector.
Definition: DVecReduceExpr.h:404
constexpr bool IsPadded_v
Auxiliary variable template for the IsPadded type trait.
Definition: IsPadded.h:134
BLAZE_ALWAYS_INLINE constexpr auto prevMultiple(T1 value, T2 factor) noexcept
Rounds down an integral value to the previous multiple of a given factor.
Definition: PrevMultiple.h:68
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
Header file for the Max functor.
Header file for the Min functor.
Generic wrapper for the addition operator.
Definition: Add.h:85
Generic wrapper for the max() function.
Definition: Max.h:82
Generic wrapper for the min() function.
Definition: Min.h:82
Generic wrapper for the multiplication operator.
Definition: Mult.h:82
Header file for basic type definitions.