35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_ 90 template<
typename VT1
95 void threadAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
101 using ET1 = ElementType_t<VT1>;
102 using ET2 = ElementType_t<VT2>;
104 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
105 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >::
size );
107 const bool lhsAligned( (~lhs).isAligned() );
108 const bool rhsAligned( (~rhs).isAligned() );
111 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
112 const size_t equalShare ( (~lhs).
size() / threads + addon );
113 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
114 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
116 for(
size_t i=0UL; i<threads; ++i )
118 const size_t index( i*sizePerThread );
120 if( index >= (~lhs).
size() )
123 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
125 if( simdEnabled && lhsAligned && rhsAligned ) {
126 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
127 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
128 TheThreadBackend::schedule( target, source, op );
130 else if( simdEnabled && lhsAligned ) {
131 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
132 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
133 TheThreadBackend::schedule( target, source, op );
135 else if( simdEnabled && rhsAligned ) {
136 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
137 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
138 TheThreadBackend::schedule( target, source, op );
141 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
142 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
143 TheThreadBackend::schedule( target, source, op );
147 TheThreadBackend::wait();
171 template<
typename VT1
176 void threadAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
183 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
184 const size_t sizePerThread( (~lhs).
size() / threads + addon );
186 for(
size_t i=0UL; i<threads; ++i )
188 const size_t index( i*sizePerThread );
190 if( index >= (~lhs).
size() )
193 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
194 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
195 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
196 TheThreadBackend::schedule( target, source, op );
199 TheThreadBackend::wait();
231 template<
typename VT1
235 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
236 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
242 assign( ~lhs, ~rhs );
266 template<
typename VT1
270 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
271 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
283 assign( ~lhs, ~rhs );
286 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
321 template<
typename VT1
325 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
326 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
332 addAssign( ~lhs, ~rhs );
356 template<
typename VT1
360 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
361 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
373 addAssign( ~lhs, ~rhs );
376 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
411 template<
typename VT1
415 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
416 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
422 subAssign( ~lhs, ~rhs );
447 template<
typename VT1
451 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
452 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
464 subAssign( ~lhs, ~rhs );
467 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
502 template<
typename VT1
506 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
507 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
513 multAssign( ~lhs, ~rhs );
538 template<
typename VT1
542 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
543 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
555 multAssign( ~lhs, ~rhs );
558 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ multAssign( a, b ); } );
593 template<
typename VT1
597 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
598 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
604 divAssign( ~lhs, ~rhs );
628 template<
typename VT1
632 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
633 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
645 divAssign( ~lhs, ~rhs );
648 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ divAssign( a, b ); } );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:152
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the DenseVector base class.
Header file for the SIMD trait.
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:220
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:124
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:191
Constraint on the data type.
Header file for the function trace functionality.