35#ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
36#define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
96void threadAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
102 using ET1 = ElementType_t<VT1>;
103 using ET2 = ElementType_t<VT2>;
105 constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
106 constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >
::size );
108 const bool lhsAligned( (*lhs).isAligned() );
109 const bool rhsAligned( (*rhs).isAligned() );
112 const size_t addon ( ( ( (*lhs).size() % threads ) != 0UL )? 1UL : 0UL );
113 const size_t equalShare ( (*lhs).size() / threads + addon );
114 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
115 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
117 for(
size_t i=0UL; i<threads; ++i )
119 const size_t index( i*sizePerThread );
121 if( index >= (*lhs).size() )
124 const size_t size(
min( sizePerThread, (*lhs).size() - index ) );
126 if( simdEnabled && lhsAligned && rhsAligned ) {
127 auto target( subvector<aligned>( *lhs, index,
size,
unchecked ) );
128 const auto source( subvector<aligned>( *rhs, index,
size,
unchecked ) );
129 TheThreadBackend::schedule( target, source, op );
131 else if( simdEnabled && lhsAligned ) {
132 auto target( subvector<aligned>( *lhs, index,
size,
unchecked ) );
133 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
134 TheThreadBackend::schedule( target, source, op );
136 else if( simdEnabled && rhsAligned ) {
137 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
138 const auto source( subvector<aligned>( *rhs, index,
size,
unchecked ) );
139 TheThreadBackend::schedule( target, source, op );
142 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
143 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
144 TheThreadBackend::schedule( target, source, op );
148 TheThreadBackend::wait();
172template<
typename VT1
177void threadAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
184 const size_t addon ( ( ( (*lhs).size() % threads ) != 0UL )? 1UL : 0UL );
185 const size_t sizePerThread( (*lhs).size() / threads + addon );
187 for(
size_t i=0UL; i<threads; ++i )
189 const size_t index( i*sizePerThread );
191 if( index >= (*lhs).size() )
194 const size_t size(
min( sizePerThread, (*lhs).size() - index ) );
195 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
196 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
197 TheThreadBackend::schedule( target, source, op );
200 TheThreadBackend::wait();
232template<
typename VT1
236inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
237 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
243 assign( *lhs, *rhs );
267template<
typename VT1
271inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
272 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
284 assign( *lhs, *rhs );
287 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
322template<
typename VT1
326inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
327 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
333 addAssign( *lhs, *rhs );
357template<
typename VT1
361inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
362 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
374 addAssign( *lhs, *rhs );
377 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
412template<
typename VT1
416inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
417 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
423 subAssign( *lhs, *rhs );
448template<
typename VT1
452inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
453 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
465 subAssign( *lhs, *rhs );
468 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
503template<
typename VT1
507inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
508 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
514 multAssign( *lhs, *rhs );
539template<
typename VT1
543inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
544 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
556 multAssign( *lhs, *rhs );
559 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ multAssign( a, b ); } );
594template<
typename VT1
598inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
599 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
605 divAssign( *lhs, *rhs );
629template<
typename VT1
633inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
634 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
646 divAssign( *lhs, *rhs );
649 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ divAssign( a, b ); } );
Header file for auxiliary alias declarations.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the IsDenseVector type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsSMPAssignable type trait.
Deactivation of problematic macros.
Header file for the parallel section implementation.
Header file for the SIMD trait.
Constraint on the data type.
Header file for the serial section implementation.
Header file for the C++11 and Boost thread backend.
Header file for the DenseVector base class.
Header file for the SparseVector base class.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.
Definition: SMPAssignable.h:81
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:221
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP assignment of a vector to a dense vector.
Definition: DenseVector.h:105
auto smpAddAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP addition assignment of a vector to a dense vector.
Definition: DenseVector.h:134
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.
Definition: ParallelSection.h:254
auto smpSubAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP subtraction assignment of a vector to a dense vector.
Definition: DenseVector.h:163
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.
Definition: SMP.h:124
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.
Definition: SMP.h:152
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
constexpr Unchecked unchecked
Global Unchecked instance.
Definition: Check.h:146
System settings for the shared-memory parallelization.
Header file for basic type definitions.
Header file for the generic min algorithm.
Header file for the implementation of the Subvector view.