35 #ifndef _BLAZE_MATH_SMP_HPX_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_HPX_DENSEVECTOR_H_ 43 #include <hpx/include/parallel_for_loop.hpp> 97 template<
typename VT1
102 void hpxAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
104 using hpx::parallel::for_loop;
105 using hpx::parallel::execution::par;
111 using ET1 = ElementType_<VT1>;
112 using ET2 = ElementType_<VT2>;
114 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
115 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
117 const bool lhsAligned( (~lhs).isAligned() );
118 const bool rhsAligned( (~rhs).isAligned() );
121 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
122 const size_t equalShare ( (~lhs).
size() / threads + addon );
123 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
124 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
126 for_loop( par,
size_t(0), threads, [&](
int i)
128 const size_t index( i*sizePerThread );
130 if( index >= (~lhs).
size() )
133 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
135 if( simdEnabled && lhsAligned && rhsAligned ) {
136 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
137 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
138 op( target, source );
140 else if( simdEnabled && lhsAligned ) {
141 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
142 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
143 op( target, source );
145 else if( simdEnabled && rhsAligned ) {
146 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
147 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
148 op( target, source );
151 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
152 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
153 op( target, source );
178 template<
typename VT1
183 void hpxAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
185 using hpx::parallel::for_loop;
186 using hpx::parallel::execution::par;
193 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
194 const size_t sizePerThread( (~lhs).
size() / threads + addon );
196 for_loop( par,
size_t(0), threads, [&](
int i)
198 const size_t index( i*sizePerThread );
200 if( index < (~lhs).
size() )
203 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
204 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
205 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
206 op( target, source );
239 template<
typename VT1
243 inline EnableIf_< And< IsDenseVector<VT1>
244 , Or< Not< IsSMPAssignable<VT1> >
245 , Not< IsSMPAssignable<VT2> > > > >
246 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
252 assign( ~lhs, ~rhs );
276 template<
typename VT1
280 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
281 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
291 assign( ~lhs, ~rhs );
294 hpxAssign( ~lhs, ~rhs, Assign() );
327 template<
typename VT1
331 inline EnableIf_< And< IsDenseVector<VT1>
332 , Or< Not< IsSMPAssignable<VT1> >
333 , Not< IsSMPAssignable<VT2> > > > >
334 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
340 addAssign( ~lhs, ~rhs );
364 template<
typename VT1
368 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
369 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
379 addAssign( ~lhs, ~rhs );
382 hpxAssign( ~lhs, ~rhs, AddAssign() );
415 template<
typename VT1
419 inline EnableIf_< And< IsDenseVector<VT1>
420 , Or< Not< IsSMPAssignable<VT1> >
421 , Not< IsSMPAssignable<VT2> > > > >
422 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
428 subAssign( ~lhs, ~rhs );
452 template<
typename VT1
456 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
457 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
467 subAssign( ~lhs, ~rhs );
470 hpxAssign( ~lhs, ~rhs, SubAssign() );
503 template<
typename VT1
507 inline EnableIf_< And< IsDenseVector<VT1>
508 , Or< Not< IsSMPAssignable<VT1> >
509 , Not< IsSMPAssignable<VT2> > > > >
510 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
516 multAssign( ~lhs, ~rhs );
540 template<
typename VT1
544 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
545 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
555 multAssign( ~lhs, ~rhs );
558 hpxAssign( ~lhs, ~rhs, MultAssign() );
591 template<
typename VT1
595 inline EnableIf_< And< IsDenseVector<VT1>
596 , Or< Not< IsSMPAssignable<VT1> >
597 , Not< IsSMPAssignable<VT2> > > > >
598 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
604 divAssign( ~lhs, ~rhs );
628 template<
typename VT1
632 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
633 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
643 divAssign( ~lhs, ~rhs );
646 hpxAssign( ~lhs, ~rhs, DivAssign() );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the Assign functor.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the AddAssign functor.
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
#define BLAZE_HPX_PARALLEL_MODE
Compilation switch for the HPX parallelization.This compilation switch enables/disables the paralleli...
Definition: SMP.h:148
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the EnableIf class template.
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the MultAssign functor.
Header file for SMP utility functions.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
BLAZE_ALWAYS_INLINE size_t getNumThreads()
Returns the number of threads used for thread parallel operations.
Definition: Functions.h:77
Header file for the DivAssign functor.
Constraint on the data type.
Header file for the function trace functionality.