35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_ 91 template<
typename VT1
95 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
101 using ET1 = ElementType_<VT1>;
102 using ET2 = ElementType_<VT2>;
104 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
105 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
107 const bool lhsAligned( (~lhs).isAligned() );
108 const bool rhsAligned( (~rhs).isAligned() );
111 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
112 const size_t equalShare ( (~lhs).
size() / threads + addon );
113 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
114 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
116 for(
size_t i=0UL; i<threads; ++i )
118 const size_t index( i*sizePerThread );
120 if( index >= (~lhs).
size() )
123 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
125 if( simdEnabled && lhsAligned && rhsAligned ) {
126 auto target( subvector<aligned>( ~lhs, index,
size ) );
127 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index,
size ) );
129 else if( simdEnabled && lhsAligned ) {
130 auto target( subvector<aligned>( ~lhs, index,
size ) );
131 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
133 else if( simdEnabled && rhsAligned ) {
134 auto target( subvector<unaligned>( ~lhs, index,
size ) );
135 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index,
size ) );
138 auto target( subvector<unaligned>( ~lhs, index,
size ) );
139 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
143 TheThreadBackend::wait();
166 template<
typename VT1
170 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
177 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
178 const size_t sizePerThread( (~lhs).
size() / threads + addon );
180 for(
size_t i=0UL; i<threads; ++i )
182 const size_t index( i*sizePerThread );
184 if( index >= (~lhs).
size() )
187 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
188 auto target( subvector<unaligned>( ~lhs, index,
size ) );
189 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
192 TheThreadBackend::wait();
216 template<
typename VT1
220 inline EnableIf_< And< IsDenseVector<VT1>
221 , Or< Not< IsSMPAssignable<VT1> >
222 , Not< IsSMPAssignable<VT2> > > > >
223 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
229 assign( ~lhs, ~rhs );
253 template<
typename VT1
257 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
258 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
270 assign( ~lhs, ~rhs );
273 smpAssign_backend( ~lhs, ~rhs );
306 template<
typename VT1
310 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
316 using ET1 = ElementType_<VT1>;
317 using ET2 = ElementType_<VT2>;
319 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
320 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
322 const bool lhsAligned( (~lhs).isAligned() );
323 const bool rhsAligned( (~rhs).isAligned() );
326 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
327 const size_t equalShare ( (~lhs).
size() / threads + addon );
328 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
329 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
331 for(
size_t i=0UL; i<threads; ++i )
333 const size_t index( i*sizePerThread );
335 if( index >= (~lhs).
size() )
338 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
340 if( simdEnabled && lhsAligned && rhsAligned ) {
341 auto target( subvector<aligned>( ~lhs, index,
size ) );
342 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index,
size ) );
344 else if( simdEnabled && lhsAligned ) {
345 auto target( subvector<aligned>( ~lhs, index,
size ) );
346 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
348 else if( simdEnabled && rhsAligned ) {
349 auto target( subvector<unaligned>( ~lhs, index,
size ) );
350 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index,
size ) );
353 auto target( subvector<unaligned>( ~lhs, index,
size ) );
354 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
358 TheThreadBackend::wait();
381 template<
typename VT1
385 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
392 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
393 const size_t sizePerThread( (~lhs).
size() / threads + addon );
395 for(
size_t i=0UL; i<threads; ++i )
397 const size_t index( i*sizePerThread );
399 if( index >= (~lhs).
size() )
402 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
403 auto target( subvector<unaligned>( ~lhs, index,
size ) );
404 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
407 TheThreadBackend::wait();
432 template<
typename VT1
436 inline EnableIf_< And< IsDenseVector<VT1>
437 , Or< Not< IsSMPAssignable<VT1> >
438 , Not< IsSMPAssignable<VT2> > > > >
439 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
445 addAssign( ~lhs, ~rhs );
469 template<
typename VT1
473 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
474 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
486 addAssign( ~lhs, ~rhs );
489 smpAddAssign_backend( ~lhs, ~rhs );
522 template<
typename VT1
526 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
532 using ET1 = ElementType_<VT1>;
533 using ET2 = ElementType_<VT2>;
535 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
536 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
538 const bool lhsAligned( (~lhs).isAligned() );
539 const bool rhsAligned( (~rhs).isAligned() );
542 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
543 const size_t equalShare ( (~lhs).
size() / threads + addon );
544 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
545 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
547 for(
size_t i=0UL; i<threads; ++i )
549 const size_t index( i*sizePerThread );
551 if( index >= (~lhs).
size() )
554 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
556 if( simdEnabled && lhsAligned && rhsAligned ) {
557 auto target( subvector<aligned>( ~lhs, index,
size ) );
558 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index,
size ) );
560 else if( simdEnabled && lhsAligned ) {
561 auto target( subvector<aligned>( ~lhs, index,
size ) );
562 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
564 else if( simdEnabled && rhsAligned ) {
565 auto target( subvector<unaligned>( ~lhs, index,
size ) );
566 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index,
size ) );
569 auto target( subvector<unaligned>( ~lhs, index,
size ) );
570 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
574 TheThreadBackend::wait();
597 template<
typename VT1
601 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
608 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
609 const size_t sizePerThread( (~lhs).
size() / threads + addon );
611 for(
size_t i=0UL; i<threads; ++i )
613 const size_t index( i*sizePerThread );
615 if( index >= (~lhs).
size() )
618 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
619 auto target( subvector<unaligned>( ~lhs, index,
size ) );
620 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
623 TheThreadBackend::wait();
648 template<
typename VT1
652 inline EnableIf_< And< IsDenseVector<VT1>
653 , Or< Not< IsSMPAssignable<VT1> >
654 , Not< IsSMPAssignable<VT2> > > > >
655 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
661 subAssign( ~lhs, ~rhs );
686 template<
typename VT1
690 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
691 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
703 subAssign( ~lhs, ~rhs );
706 smpSubAssign_backend( ~lhs, ~rhs );
739 template<
typename VT1
743 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
749 using ET1 = ElementType_<VT1>;
750 using ET2 = ElementType_<VT2>;
752 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
753 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
755 const bool lhsAligned( (~lhs).isAligned() );
756 const bool rhsAligned( (~rhs).isAligned() );
759 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
760 const size_t equalShare ( (~lhs).
size() / threads + addon );
761 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
762 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
764 for(
size_t i=0UL; i<threads; ++i )
766 const size_t index( i*sizePerThread );
768 if( index >= (~lhs).
size() )
771 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
773 if( simdEnabled && lhsAligned && rhsAligned ) {
774 auto target( subvector<aligned>( ~lhs, index,
size ) );
775 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index,
size ) );
777 else if( simdEnabled && lhsAligned ) {
778 auto target( subvector<aligned>( ~lhs, index,
size ) );
779 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
781 else if( simdEnabled && rhsAligned ) {
782 auto target( subvector<unaligned>( ~lhs, index,
size ) );
783 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index,
size ) );
786 auto target( subvector<unaligned>( ~lhs, index,
size ) );
787 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
791 TheThreadBackend::wait();
814 template<
typename VT1
818 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
825 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
826 const size_t sizePerThread( (~lhs).
size() / threads + addon );
828 for(
size_t i=0UL; i<threads; ++i )
830 const size_t index( i*sizePerThread );
832 if( index >= (~lhs).
size() )
835 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
836 auto target( subvector<unaligned>( ~lhs, index,
size ) );
837 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
840 TheThreadBackend::wait();
865 template<
typename VT1
869 inline EnableIf_< And< IsDenseVector<VT1>
870 , Or< Not< IsSMPAssignable<VT1> >
871 , Not< IsSMPAssignable<VT2> > > > >
872 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
878 multAssign( ~lhs, ~rhs );
903 template<
typename VT1
907 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
908 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
920 multAssign( ~lhs, ~rhs );
923 smpMultAssign_backend( ~lhs, ~rhs );
956 template<
typename VT1
960 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
966 using ET1 = ElementType_<VT1>;
967 using ET2 = ElementType_<VT2>;
969 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
970 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
972 const bool lhsAligned( (~lhs).isAligned() );
973 const bool rhsAligned( (~rhs).isAligned() );
976 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
977 const size_t equalShare ( (~lhs).
size() / threads + addon );
978 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
979 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
981 for(
size_t i=0UL; i<threads; ++i )
983 const size_t index( i*sizePerThread );
985 if( index >= (~lhs).
size() )
988 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
990 if( simdEnabled && lhsAligned && rhsAligned ) {
991 auto target( subvector<aligned>( ~lhs, index,
size ) );
992 TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index,
size ) );
994 else if( simdEnabled && lhsAligned ) {
995 auto target( subvector<aligned>( ~lhs, index,
size ) );
996 TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
998 else if( simdEnabled && rhsAligned ) {
999 auto target( subvector<unaligned>( ~lhs, index,
size ) );
1000 TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index,
size ) );
1003 auto target( subvector<unaligned>( ~lhs, index,
size ) );
1004 TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
1008 TheThreadBackend::wait();
1033 template<
typename VT1
1037 inline EnableIf_< And< IsDenseVector<VT1>
1038 , Or< Not< IsSMPAssignable<VT1> >
1039 , Not< IsSMPAssignable<VT2> > > > >
1040 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1046 divAssign( ~lhs, ~rhs );
1070 template<
typename VT1
1074 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1075 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1087 divAssign( ~lhs, ~rhs );
1090 smpDivAssign_backend( ~lhs, ~rhs );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.