35#ifndef _BLAZE_MATH_SMP_HPX_DENSEVECTOR_H_
36#define _BLAZE_MATH_SMP_HPX_DENSEVECTOR_H_
43#include <hpx/include/parallel_for_loop.hpp>
95void hpxAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
97#if HPX_VERSION_FULL >= 0x010500
99 using hpx::execution::par;
101 using hpx::parallel::for_loop;
102 using hpx::parallel::execution::par;
107 using ET1 = ElementType_t<VT1>;
108 using ET2 = ElementType_t<VT2>;
110 constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
111 constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >
::size );
113 const bool lhsAligned( (*lhs).isAligned() );
114 const bool rhsAligned( (*rhs).isAligned() );
117 const size_t addon ( ( ( (*lhs).size() % threads ) != 0UL )? 1UL : 0UL );
118 const size_t equalShare ( (*lhs).size() / threads + addon );
119 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
120 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
122 for_loop( par,
size_t(0), threads, [&](
int i)
124 const size_t index( i*sizePerThread );
126 if( index >= (*lhs).size() )
129 const size_t size(
min( sizePerThread, (*lhs).size() - index ) );
131 if( simdEnabled && lhsAligned && rhsAligned ) {
132 auto target( subvector<aligned>( *lhs, index,
size,
unchecked ) );
133 const auto source( subvector<aligned>( *rhs, index,
size,
unchecked ) );
134 op( target, source );
136 else if( simdEnabled && lhsAligned ) {
137 auto target( subvector<aligned>( *lhs, index,
size,
unchecked ) );
138 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
139 op( target, source );
141 else if( simdEnabled && rhsAligned ) {
142 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
143 const auto source( subvector<aligned>( *rhs, index,
size,
unchecked ) );
144 op( target, source );
147 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
148 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
149 op( target, source );
174template<
typename VT1
179void hpxAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
181#if HPX_VERSION_FULL >= 0x010500
183 using hpx::execution::par;
185 using hpx::parallel::for_loop;
186 using hpx::parallel::execution::par;
192 const size_t addon ( ( ( (*lhs).size() % threads ) != 0UL )? 1UL : 0UL );
193 const size_t sizePerThread( (*lhs).size() / threads + addon );
195 for_loop( par,
size_t(0), threads, [&](
int i)
197 const size_t index( i*sizePerThread );
199 if( index < (*lhs).size() )
202 const size_t size(
min( sizePerThread, (*lhs).size() - index ) );
203 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
204 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
205 op( target, source );
238template<
typename VT1
242inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
243 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
249 assign( *lhs, *rhs );
273template<
typename VT1
277inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
278 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
288 assign( *lhs, *rhs );
291 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
324template<
typename VT1
328inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
329 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
335 addAssign( *lhs, *rhs );
359template<
typename VT1
363inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
364 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
374 addAssign( *lhs, *rhs );
377 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
410template<
typename VT1
414inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
415 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
421 subAssign( *lhs, *rhs );
445template<
typename VT1
449inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
450 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
460 subAssign( *lhs, *rhs );
463 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
496template<
typename VT1
500inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
501 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
507 multAssign( *lhs, *rhs );
531template<
typename VT1
535inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
536 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
546 multAssign( *lhs, *rhs );
549 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ multAssign( a, b ); } );
582template<
typename VT1
586inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
587 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
593 divAssign( *lhs, *rhs );
617template<
typename VT1
621inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
622 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
632 divAssign( *lhs, *rhs );
635 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ divAssign( a, b ); } );
Header file for auxiliary alias declarations.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the IsDenseVector type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsSMPAssignable type trait.
Deactivation of problematic macros.
Header file for the SIMD trait.
Constraint on the data type.
Header file for the serial section implementation.
Header file for the DenseVector base class.
Header file for the SparseVector base class.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.
Definition: SMPAssignable.h:81
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:221
BLAZE_ALWAYS_INLINE size_t getNumThreads()
Returns the number of threads used for thread parallel operations.
Definition: Functions.h:77
auto smpAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP assignment of a vector to a dense vector.
Definition: DenseVector.h:105
auto smpAddAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP addition assignment of a vector to a dense vector.
Definition: DenseVector.h:134
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
auto smpSubAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP subtraction assignment of a vector to a dense vector.
Definition: DenseVector.h:163
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_HPX_PARALLEL_MODE
Compilation switch for the HPX parallelization.
Definition: SMP.h:96
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
constexpr Unchecked unchecked
Global Unchecked instance.
Definition: Check.h:146
Header file for SMP utility functions.
System settings for the shared-memory parallelization.
Header file for basic type definitions.
Header file for the generic min algorithm.
Header file for the implementation of the Subvector view.