35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
92 template<
typename VT1
96 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
102 typedef ElementType_<VT1> ET1;
103 typedef ElementType_<VT2> ET2;
104 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
105 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
107 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
109 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
110 const bool lhsAligned ( (~lhs).isAligned() );
111 const bool rhsAligned ( (~rhs).isAligned() );
114 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
115 const size_t equalShare ( (~lhs).
size() / threads + addon );
116 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
117 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
119 for(
size_t i=0UL; i<threads; ++i )
121 const size_t index( i*sizePerThread );
123 if( index >= (~lhs).
size() )
126 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
128 if( simdEnabled && lhsAligned && rhsAligned ) {
129 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
130 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index,
size ) );
132 else if( simdEnabled && lhsAligned ) {
133 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
134 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
136 else if( simdEnabled && rhsAligned ) {
137 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
138 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index,
size ) );
141 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
142 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
146 TheThreadBackend::wait();
169 template<
typename VT1
173 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
179 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
182 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
183 const size_t sizePerThread( (~lhs).
size() / threads + addon );
185 for(
size_t i=0UL; i<threads; ++i )
187 const size_t index( i*sizePerThread );
189 if( index >= (~lhs).
size() )
192 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
193 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
194 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
197 TheThreadBackend::wait();
221 template<
typename VT1
225 inline EnableIf_< And< IsDenseVector<VT1>
226 , Or< Not< IsSMPAssignable<VT1> >
227 , Not< IsSMPAssignable<VT2> > > > >
228 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
234 assign( ~lhs, ~rhs );
258 template<
typename VT1
262 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
263 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
275 assign( ~lhs, ~rhs );
278 smpAssign_backend( ~lhs, ~rhs );
311 template<
typename VT1
315 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
321 typedef ElementType_<VT1> ET1;
322 typedef ElementType_<VT2> ET2;
323 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
324 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
326 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
328 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
329 const bool lhsAligned ( (~lhs).isAligned() );
330 const bool rhsAligned ( (~rhs).isAligned() );
333 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
334 const size_t equalShare ( (~lhs).
size() / threads + addon );
335 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
336 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
338 for(
size_t i=0UL; i<threads; ++i )
340 const size_t index( i*sizePerThread );
342 if( index >= (~lhs).
size() )
345 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
347 if( simdEnabled && lhsAligned && rhsAligned ) {
348 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
349 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index,
size ) );
351 else if( simdEnabled && lhsAligned ) {
352 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
353 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
355 else if( simdEnabled && rhsAligned ) {
356 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
357 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index,
size ) );
360 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
361 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
365 TheThreadBackend::wait();
388 template<
typename VT1
392 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
398 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
401 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
402 const size_t sizePerThread( (~lhs).
size() / threads + addon );
404 for(
size_t i=0UL; i<threads; ++i )
406 const size_t index( i*sizePerThread );
408 if( index >= (~lhs).
size() )
411 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
412 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
413 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
416 TheThreadBackend::wait();
441 template<
typename VT1
445 inline EnableIf_< And< IsDenseVector<VT1>
446 , Or< Not< IsSMPAssignable<VT1> >
447 , Not< IsSMPAssignable<VT2> > > > >
448 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
454 addAssign( ~lhs, ~rhs );
478 template<
typename VT1
482 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
483 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
495 addAssign( ~lhs, ~rhs );
498 smpAddAssign_backend( ~lhs, ~rhs );
531 template<
typename VT1
535 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
541 typedef ElementType_<VT1> ET1;
542 typedef ElementType_<VT2> ET2;
543 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
544 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
546 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
548 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
549 const bool lhsAligned ( (~lhs).isAligned() );
550 const bool rhsAligned ( (~rhs).isAligned() );
553 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
554 const size_t equalShare ( (~lhs).
size() / threads + addon );
555 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
556 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
558 for(
size_t i=0UL; i<threads; ++i )
560 const size_t index( i*sizePerThread );
562 if( index >= (~lhs).
size() )
565 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
567 if( simdEnabled && lhsAligned && rhsAligned ) {
568 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
569 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index,
size ) );
571 else if( simdEnabled && lhsAligned ) {
572 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
573 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
575 else if( simdEnabled && rhsAligned ) {
576 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
577 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index,
size ) );
580 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
581 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
585 TheThreadBackend::wait();
608 template<
typename VT1
612 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
618 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
621 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
622 const size_t sizePerThread( (~lhs).
size() / threads + addon );
624 for(
size_t i=0UL; i<threads; ++i )
626 const size_t index( i*sizePerThread );
628 if( index >= (~lhs).
size() )
631 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
632 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
633 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
636 TheThreadBackend::wait();
661 template<
typename VT1
665 inline EnableIf_< And< IsDenseVector<VT1>
666 , Or< Not< IsSMPAssignable<VT1> >
667 , Not< IsSMPAssignable<VT2> > > > >
668 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
674 subAssign( ~lhs, ~rhs );
699 template<
typename VT1
703 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
704 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
716 subAssign( ~lhs, ~rhs );
719 smpSubAssign_backend( ~lhs, ~rhs );
752 template<
typename VT1
756 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
762 typedef ElementType_<VT1> ET1;
763 typedef ElementType_<VT2> ET2;
764 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
765 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
767 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
769 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
770 const bool lhsAligned ( (~lhs).isAligned() );
771 const bool rhsAligned ( (~rhs).isAligned() );
774 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
775 const size_t equalShare ( (~lhs).
size() / threads + addon );
776 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
777 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
779 for(
size_t i=0UL; i<threads; ++i )
781 const size_t index( i*sizePerThread );
783 if( index >= (~lhs).
size() )
786 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
788 if( simdEnabled && lhsAligned && rhsAligned ) {
789 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
790 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index,
size ) );
792 else if( simdEnabled && lhsAligned ) {
793 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
794 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
796 else if( simdEnabled && rhsAligned ) {
797 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
798 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index,
size ) );
801 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
802 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
806 TheThreadBackend::wait();
829 template<
typename VT1
833 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
839 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
842 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
843 const size_t sizePerThread( (~lhs).
size() / threads + addon );
845 for(
size_t i=0UL; i<threads; ++i )
847 const size_t index( i*sizePerThread );
849 if( index >= (~lhs).
size() )
852 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
853 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
854 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
857 TheThreadBackend::wait();
882 template<
typename VT1
886 inline EnableIf_< And< IsDenseVector<VT1>
887 , Or< Not< IsSMPAssignable<VT1> >
888 , Not< IsSMPAssignable<VT2> > > > >
889 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
895 multAssign( ~lhs, ~rhs );
920 template<
typename VT1
924 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
925 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
937 multAssign( ~lhs, ~rhs );
940 smpMultAssign_backend( ~lhs, ~rhs );
973 template<
typename VT1
977 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
983 typedef ElementType_<VT1> ET1;
984 typedef ElementType_<VT2> ET2;
985 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
986 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
988 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
990 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
991 const bool lhsAligned ( (~lhs).isAligned() );
992 const bool rhsAligned ( (~rhs).isAligned() );
995 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
996 const size_t equalShare ( (~lhs).
size() / threads + addon );
997 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
998 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
1000 for(
size_t i=0UL; i<threads; ++i )
1002 const size_t index( i*sizePerThread );
1004 if( index >= (~lhs).
size() )
1007 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
1009 if( simdEnabled && lhsAligned && rhsAligned ) {
1010 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
1011 TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index,
size ) );
1013 else if( simdEnabled && lhsAligned ) {
1014 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
1015 TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
1017 else if( simdEnabled && rhsAligned ) {
1018 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
1019 TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index,
size ) );
1022 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
1023 TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
1027 TheThreadBackend::wait();
1052 template<
typename VT1
1056 inline EnableIf_< And< IsDenseVector<VT1>
1057 , Or< Not< IsSMPAssignable<VT1> >
1058 , Not< IsSMPAssignable<VT2> > > > >
1059 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1065 divAssign( ~lhs, ~rhs );
1089 template<
typename VT1
1093 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1094 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1106 divAssign( ~lhs, ~rhs );
1109 smpDivAssign_backend( ~lhs, ~rhs );
Header file for auxiliary alias declarations.
Header file for mathematical functions.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:258
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
Header file for the IsSame and IsStrictlySame type traits.
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1669
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the C++11 and Boost thread backend.
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.