35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_ 98 template<
typename VT1
103 void threadAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
109 using ET1 = ElementType_<VT1>;
110 using ET2 = ElementType_<VT2>;
112 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
113 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
115 const bool lhsAligned( (~lhs).isAligned() );
116 const bool rhsAligned( (~rhs).isAligned() );
119 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
120 const size_t equalShare ( (~lhs).
size() / threads + addon );
121 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
122 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
124 for(
size_t i=0UL; i<threads; ++i )
126 const size_t index( i*sizePerThread );
128 if( index >= (~lhs).
size() )
131 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
133 if( simdEnabled && lhsAligned && rhsAligned ) {
134 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
135 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
136 TheThreadBackend::schedule( target, source, op );
138 else if( simdEnabled && lhsAligned ) {
139 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
140 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
141 TheThreadBackend::schedule( target, source, op );
143 else if( simdEnabled && rhsAligned ) {
144 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
145 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
146 TheThreadBackend::schedule( target, source, op );
149 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
150 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
151 TheThreadBackend::schedule( target, source, op );
155 TheThreadBackend::wait();
179 template<
typename VT1
184 void threadAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
191 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
192 const size_t sizePerThread( (~lhs).
size() / threads + addon );
194 for(
size_t i=0UL; i<threads; ++i )
196 const size_t index( i*sizePerThread );
198 if( index >= (~lhs).
size() )
201 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
202 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
203 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
204 TheThreadBackend::schedule( target, source, op );
207 TheThreadBackend::wait();
239 template<
typename VT1
243 inline EnableIf_< And< IsDenseVector<VT1>
244 , Or< Not< IsSMPAssignable<VT1> >
245 , Not< IsSMPAssignable<VT2> > > > >
246 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
252 assign( ~lhs, ~rhs );
276 template<
typename VT1
280 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
281 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
293 assign( ~lhs, ~rhs );
296 threadAssign( ~lhs, ~rhs, Assign() );
331 template<
typename VT1
335 inline EnableIf_< And< IsDenseVector<VT1>
336 , Or< Not< IsSMPAssignable<VT1> >
337 , Not< IsSMPAssignable<VT2> > > > >
338 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
344 addAssign( ~lhs, ~rhs );
368 template<
typename VT1
372 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
373 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
385 addAssign( ~lhs, ~rhs );
388 threadAssign( ~lhs, ~rhs, AddAssign() );
423 template<
typename VT1
427 inline EnableIf_< And< IsDenseVector<VT1>
428 , Or< Not< IsSMPAssignable<VT1> >
429 , Not< IsSMPAssignable<VT2> > > > >
430 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
436 subAssign( ~lhs, ~rhs );
461 template<
typename VT1
465 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
466 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
478 subAssign( ~lhs, ~rhs );
481 threadAssign( ~lhs, ~rhs, SubAssign() );
516 template<
typename VT1
520 inline EnableIf_< And< IsDenseVector<VT1>
521 , Or< Not< IsSMPAssignable<VT1> >
522 , Not< IsSMPAssignable<VT2> > > > >
523 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
529 multAssign( ~lhs, ~rhs );
554 template<
typename VT1
558 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
559 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
571 multAssign( ~lhs, ~rhs );
574 threadAssign( ~lhs, ~rhs, MultAssign() );
609 template<
typename VT1
613 inline EnableIf_< And< IsDenseVector<VT1>
614 , Or< Not< IsSMPAssignable<VT1> >
615 , Not< IsSMPAssignable<VT2> > > > >
616 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
622 divAssign( ~lhs, ~rhs );
646 template<
typename VT1
650 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
651 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
663 divAssign( ~lhs, ~rhs );
666 threadAssign( ~lhs, ~rhs, DivAssign() );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the Assign functor.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the AddAssign functor.
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the MultAssign functor.
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the DivAssign functor.
Constraint on the data type.
Header file for the function trace functionality.