35#ifndef _BLAZE_MATH_EXPRESSIONS_DVECREDUCEEXPR_H_
36#define _BLAZE_MATH_EXPRESSIONS_DVECREDUCEEXPR_H_
79struct DVecReduceExprHelper
83 using CT = RemoveReference_t< CompositeType_t<VT> >;
86 using ET = RemoveCV_t< ElementType_t<CT> >;
90 static constexpr bool value =
92 If_t< HasSIMDEnabled_v<OP>, GetSIMDEnabled<OP,ET,ET>, HasLoad<OP> >::value );
123inline auto dvecreduce(
const DenseVector<VT,TF>& dv, OP op )
124 -> DisableIf_t< DVecReduceExprHelper<VT,OP>::value, RemoveCV_t< ReduceTrait_t<VT,OP> > >
126 using CT = CompositeType_t<VT>;
127 using RT = RemoveCV_t< ReduceTrait_t<VT,OP> >;
129 const size_t N( (*dv).size() );
131 if( N == 0UL )
return RT{};
132 if( N == 1UL )
return (*dv)[0UL];
138 RT redux1( tmp[0UL] );
139 RT redux2( tmp[1UL] );
142 for( ; (i+4UL) <= N; i+=4UL ) {
143 redux1 = op( op( redux1, tmp[i ] ), tmp[i+1UL] );
144 redux2 = op( op( redux2, tmp[i+2UL] ), tmp[i+3UL] );
146 for( ; (i+2UL) <= N; i+=2UL ) {
147 redux1 = op( redux1, tmp[i ] );
148 redux2 = op( redux2, tmp[i+1UL] );
151 redux1 = op( redux1, tmp[i] );
154 return op( redux1, redux2 );
176inline auto dvecreduce(
const DenseVector<VT,TF>& dv, OP op )
177 -> EnableIf_t< DVecReduceExprHelper<VT,OP>::value, RemoveCV_t< ElementType_t<VT> > >
179 using CT = CompositeType_t<VT>;
180 using ET = RemoveCV_t< ElementType_t<VT> >;
182 const size_t N( (*dv).size() );
184 if( N == 0UL )
return ET{};
199 SIMDTrait_t<ET> xmm1( tmp.load(0UL) );
201 if( N >= SIMDSIZE*2UL )
203 SIMDTrait_t<ET> xmm2( tmp.load(SIMDSIZE) );
204 size_t i( SIMDSIZE*2UL );
206 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
207 xmm1 = op( xmm1, tmp.load(i ) );
208 xmm2 = op( xmm2, tmp.load(i+SIMDSIZE) );
210 for( ; i<ipos; i+=SIMDSIZE ) {
211 xmm1 = op( xmm1, tmp.load(i) );
214 xmm1 = op( xmm1, xmm2 );
217 redux =
reduce( xmm1, op );
219 for(
size_t i=ipos; i<N; ++i ) {
220 redux = op( redux, tmp[i] );
225 for(
size_t i=1UL; i<N; ++i ) {
226 redux = op( redux, tmp[i] );
250inline auto dvecreduce(
const DenseVector<VT,TF>& dv, Add )
251 -> EnableIf_t< DVecReduceExprHelper<VT,Add>::value, RemoveCV_t< ElementType_t<VT> > >
253 using CT = CompositeType_t<VT>;
254 using ET = RemoveCV_t< ElementType_t<VT> >;
256 const size_t N( (*dv).size() );
258 if( N == 0UL )
return ET{};
264 constexpr bool remainder( !
IsPadded_v< RemoveReference_t<CT> > );
267 const size_t ipos( remainder ?
prevMultiple( N, SIMDSIZE ) : N );
273 if( SIMDSIZE*3UL < ipos )
275 SIMDTrait_t<ET> xmm1{}, xmm2{}, xmm3{}, xmm4{};
277 for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
278 xmm1 += tmp.load(i );
279 xmm2 += tmp.load(i+SIMDSIZE );
280 xmm3 += tmp.load(i+SIMDSIZE*2UL);
281 xmm4 += tmp.load(i+SIMDSIZE*3UL);
283 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
284 xmm1 += tmp.load(i );
285 xmm2 += tmp.load(i+SIMDSIZE);
287 for( ; i<ipos; i+=SIMDSIZE ) {
291 redux =
sum( xmm1 + xmm2 + xmm3 + xmm4 );
293 else if( SIMDSIZE < ipos )
295 SIMDTrait_t<ET> xmm1{}, xmm2{};
297 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
298 xmm1 += tmp.load(i );
299 xmm2 += tmp.load(i+SIMDSIZE);
301 for( ; i<ipos; i+=SIMDSIZE ) {
305 redux =
sum( xmm1 + xmm2 );
309 SIMDTrait_t<ET> xmm1{};
311 for( ; i<ipos; i+=SIMDSIZE ) {
318 for( ; remainder && i<N; ++i ) {
341inline auto dvecreduce(
const DenseVector<VT,TF>& dv, Min )
342 -> EnableIf_t< IsUniform_v<VT>, RemoveCV_t< ElementType_t<VT> > >
363inline auto dvecreduce(
const DenseVector<VT,TF>& dv, Max )
364 -> EnableIf_t< IsUniform_v<VT>, RemoveCV_t< ElementType_t<VT> > >
408 return dvecreduce( *dv, std::move(op) );
544 if( (*dv).size() < 2UL )
549 const size_t size( a.size() );
553 for(
size_t i=1UL; i<
size; ++i ) {
557 min = std::move( cur );
586 if( (*dv).size() < 2UL )
591 const size_t size( a.size() );
595 for(
size_t i=1UL; i<
size; ++i ) {
599 max = std::move( cur );
Header file for auxiliary alias declarations.
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.
Definition: Aliases.h:110
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the HasLoad type trait.
Header file for the HasMember type traits.
Header file for the IsPadded type trait.
Header file for the IsSIMDEnabled type trait.
Header file for the prevMultiple shim.
Header file for the RemoveCV type trait.
Header file for the RemoveReference type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.
Definition: DenseVector.h:77
Header file for the DenseVector base class.
Header file for the Add functor.
Header file for the Mult functor.
decltype(auto) prod(const DenseVector< VT, TF > &dv)
Reduces the given dense vector by means of multiplication.
Definition: DVecReduceExpr.h:459
decltype(auto) sum(const DenseVector< VT, TF > &dv)
Reduces the given dense vector by means of addition.
Definition: DVecReduceExpr.h:432
size_t argmax(const DenseVector< VT, TF > &dv)
Returns the index of the first largest element of the dense vector.
Definition: DVecReduceExpr.h:584
decltype(auto) min(const DenseVector< VT, TF > &dv)
Returns the smallest element of the dense vector.
Definition: DVecReduceExpr.h:487
size_t argmin(const DenseVector< VT, TF > &dv)
Returns the index of the first smallest element of the dense vector.
Definition: DVecReduceExpr.h:542
decltype(auto) max(const DenseVector< VT, TF > &dv)
Returns the largest element of the dense vector.
Definition: DVecReduceExpr.h:515
decltype(auto) reduce(const DenseVector< VT, TF > &dv, OP op)
Performs a custom reduction operation on the given dense vector.
Definition: DVecReduceExpr.h:404
constexpr bool IsPadded_v
Auxiliary variable template for the IsPadded type trait.
Definition: IsPadded.h:134
BLAZE_ALWAYS_INLINE constexpr auto prevMultiple(T1 value, T2 factor) noexcept
Rounds down an integral value to the previous multiple of a given factor.
Definition: PrevMultiple.h:68
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
Header file for the Max functor.
Header file for the Min functor.
Generic wrapper for the addition operator.
Definition: Add.h:85
Generic wrapper for the max() function.
Definition: Max.h:82
Generic wrapper for the min() function.
Definition: Min.h:82
Generic wrapper for the multiplication operator.
Definition: Mult.h:82
Header file for basic type definitions.