35 #ifndef _BLAZE_MATH_SMP_HPX_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_HPX_DENSEVECTOR_H_ 43 #include <hpx/include/parallel_for_loop.hpp> 89 template<
typename VT1
94 void hpxAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
96 using hpx::parallel::for_loop;
97 using hpx::parallel::execution::par;
101 using ET1 = ElementType_t<VT1>;
102 using ET2 = ElementType_t<VT2>;
104 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
105 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >::
size );
107 const bool lhsAligned( (~lhs).isAligned() );
108 const bool rhsAligned( (~rhs).isAligned() );
111 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
112 const size_t equalShare ( (~lhs).
size() / threads + addon );
113 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
114 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
116 for_loop( par,
size_t(0), threads, [&](
int i)
118 const size_t index( i*sizePerThread );
120 if( index >= (~lhs).
size() )
123 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
125 if( simdEnabled && lhsAligned && rhsAligned ) {
126 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
127 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
128 op( target, source );
130 else if( simdEnabled && lhsAligned ) {
131 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
132 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
133 op( target, source );
135 else if( simdEnabled && rhsAligned ) {
136 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
137 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
138 op( target, source );
141 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
142 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
143 op( target, source );
168 template<
typename VT1
173 void hpxAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
175 using hpx::parallel::for_loop;
176 using hpx::parallel::execution::par;
181 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
182 const size_t sizePerThread( (~lhs).
size() / threads + addon );
184 for_loop( par,
size_t(0), threads, [&](
int i)
186 const size_t index( i*sizePerThread );
188 if( index < (~lhs).
size() )
191 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
192 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
193 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
194 op( target, source );
227 template<
typename VT1
231 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
232 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
238 assign( ~lhs, ~rhs );
262 template<
typename VT1
266 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
267 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
277 assign( ~lhs, ~rhs );
280 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
313 template<
typename VT1
317 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
318 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
324 addAssign( ~lhs, ~rhs );
348 template<
typename VT1
352 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
353 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
363 addAssign( ~lhs, ~rhs );
366 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
399 template<
typename VT1
403 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
404 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
410 subAssign( ~lhs, ~rhs );
434 template<
typename VT1
438 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
439 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
449 subAssign( ~lhs, ~rhs );
452 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
485 template<
typename VT1
489 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
490 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
496 multAssign( ~lhs, ~rhs );
520 template<
typename VT1
524 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
525 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
535 multAssign( ~lhs, ~rhs );
538 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ multAssign( a, b ); } );
571 template<
typename VT1
575 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
576 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
582 divAssign( ~lhs, ~rhs );
606 template<
typename VT1
610 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
611 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
621 divAssign( ~lhs, ~rhs );
624 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ divAssign( a, b ); } );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the DenseVector base class.
Header file for the SIMD trait.
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:220
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
#define BLAZE_HPX_PARALLEL_MODE
Compilation switch for the HPX parallelization.This compilation switch enables/disables the paralleli...
Definition: SMP.h:96
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
Header file for the serial section implementation.
Header file for the EnableIf class template.
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Header file for the IsDenseVector type trait.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for SMP utility functions.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:191
BLAZE_ALWAYS_INLINE size_t getNumThreads()
Returns the number of threads used for thread parallel operations.
Definition: Functions.h:77
Constraint on the data type.
Header file for the function trace functionality.