35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
92 template<
typename VT1
96 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
104 typedef IntrinsicTrait<typename VT1::ElementType> IT;
105 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
106 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
108 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
109 const bool lhsAligned ( (~lhs).isAligned() );
110 const bool rhsAligned ( (~rhs).isAligned() );
113 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
114 const size_t equalShare ( (~lhs).
size() / threads + addon );
115 const size_t rest ( equalShare & (
IT::size - 1UL ) );
116 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
118 for(
size_t i=0UL; i<threads; ++i )
120 const size_t index( i*sizePerThread );
122 if( index >= (~lhs).
size() )
125 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
127 if( vectorizable && lhsAligned && rhsAligned ) {
128 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
129 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index,
size ) );
131 else if( vectorizable && lhsAligned ) {
132 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
133 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
135 else if( vectorizable && rhsAligned ) {
136 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
137 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index,
size ) );
140 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
141 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
145 TheThreadBackend::wait();
168 template<
typename VT1
172 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
180 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
183 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
184 const size_t sizePerThread( (~lhs).
size() / threads + addon );
186 for(
size_t i=0UL; i<threads; ++i )
188 const size_t index( i*sizePerThread );
190 if( index >= (~lhs).
size() )
193 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
194 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
195 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
198 TheThreadBackend::wait();
222 template<
typename VT1
226 inline typename EnableIf< And< IsDenseVector<VT1>
227 , Or< Not< IsSMPAssignable<VT1> >
228 , Not< IsSMPAssignable<VT2> > > > >::Type
229 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
235 assign( ~lhs, ~rhs );
259 template<
typename VT1
263 inline typename EnableIf< And< IsDenseVector<VT1>
264 , IsSMPAssignable<VT1>
265 , IsSMPAssignable<VT2> > >::Type
266 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
278 assign( ~lhs, ~rhs );
281 smpAssign_backend( ~lhs, ~rhs );
314 template<
typename VT1
318 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
326 typedef IntrinsicTrait<typename VT1::ElementType> IT;
327 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
328 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
330 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
331 const bool lhsAligned ( (~lhs).isAligned() );
332 const bool rhsAligned ( (~rhs).isAligned() );
335 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
336 const size_t equalShare ( (~lhs).
size() / threads + addon );
337 const size_t rest ( equalShare & (
IT::size - 1UL ) );
338 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
340 for(
size_t i=0UL; i<threads; ++i )
342 const size_t index( i*sizePerThread );
344 if( index >= (~lhs).
size() )
347 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
349 if( vectorizable && lhsAligned && rhsAligned ) {
350 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
351 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index,
size ) );
353 else if( vectorizable && lhsAligned ) {
354 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
355 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
357 else if( vectorizable && rhsAligned ) {
358 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
359 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index,
size ) );
362 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
363 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
367 TheThreadBackend::wait();
390 template<
typename VT1
394 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
402 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
405 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
406 const size_t sizePerThread( (~lhs).
size() / threads + addon );
408 for(
size_t i=0UL; i<threads; ++i )
410 const size_t index( i*sizePerThread );
412 if( index >= (~lhs).
size() )
415 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
416 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
417 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
420 TheThreadBackend::wait();
445 template<
typename VT1
449 inline typename EnableIf< And< IsDenseVector<VT1>
450 , Or< Not< IsSMPAssignable<VT1> >
451 , Not< IsSMPAssignable<VT2> > > > >::Type
452 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
458 addAssign( ~lhs, ~rhs );
482 template<
typename VT1
486 inline typename EnableIf< And< IsDenseVector<VT1>
487 , IsSMPAssignable<VT1>
488 , IsSMPAssignable<VT2> > >::Type
489 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
501 addAssign( ~lhs, ~rhs );
504 smpAddAssign_backend( ~lhs, ~rhs );
537 template<
typename VT1
541 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
549 typedef IntrinsicTrait<typename VT1::ElementType> IT;
550 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
551 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
553 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
554 const bool lhsAligned ( (~lhs).isAligned() );
555 const bool rhsAligned ( (~rhs).isAligned() );
558 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
559 const size_t equalShare ( (~lhs).
size() / threads + addon );
560 const size_t rest ( equalShare & (
IT::size - 1UL ) );
561 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
563 for(
size_t i=0UL; i<threads; ++i )
565 const size_t index( i*sizePerThread );
567 if( index >= (~lhs).
size() )
570 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
572 if( vectorizable && lhsAligned && rhsAligned ) {
573 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
574 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index,
size ) );
576 else if( vectorizable && lhsAligned ) {
577 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
578 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
580 else if( vectorizable && rhsAligned ) {
581 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
582 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index,
size ) );
585 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
586 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
590 TheThreadBackend::wait();
613 template<
typename VT1
617 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
625 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
628 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
629 const size_t sizePerThread( (~lhs).
size() / threads + addon );
631 for(
size_t i=0UL; i<threads; ++i )
633 const size_t index( i*sizePerThread );
635 if( index >= (~lhs).
size() )
638 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
639 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
640 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
643 TheThreadBackend::wait();
668 template<
typename VT1
672 inline typename EnableIf< And< IsDenseVector<VT1>
673 , Or< Not< IsSMPAssignable<VT1> >
674 , Not< IsSMPAssignable<VT2> > > > >::Type
675 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
681 subAssign( ~lhs, ~rhs );
706 template<
typename VT1
710 inline typename EnableIf< And< IsDenseVector<VT1>
711 , IsSMPAssignable<VT1>
712 , IsSMPAssignable<VT2> > >::Type
713 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
725 subAssign( ~lhs, ~rhs );
728 smpSubAssign_backend( ~lhs, ~rhs );
761 template<
typename VT1
765 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
773 typedef IntrinsicTrait<typename VT1::ElementType> IT;
774 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
775 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
777 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
778 const bool lhsAligned ( (~lhs).isAligned() );
779 const bool rhsAligned ( (~rhs).isAligned() );
782 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
783 const size_t equalShare ( (~lhs).
size() / threads + addon );
784 const size_t rest ( equalShare & (
IT::size - 1UL ) );
785 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
787 for(
size_t i=0UL; i<threads; ++i )
789 const size_t index( i*sizePerThread );
791 if( index >= (~lhs).
size() )
794 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
796 if( vectorizable && lhsAligned && rhsAligned ) {
797 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
798 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index,
size ) );
800 else if( vectorizable && lhsAligned ) {
801 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
802 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
804 else if( vectorizable && rhsAligned ) {
805 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
806 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index,
size ) );
809 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
810 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
814 TheThreadBackend::wait();
837 template<
typename VT1
841 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
849 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
852 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
853 const size_t sizePerThread( (~lhs).
size() / threads + addon );
855 for(
size_t i=0UL; i<threads; ++i )
857 const size_t index( i*sizePerThread );
859 if( index >= (~lhs).
size() )
862 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
863 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
864 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
867 TheThreadBackend::wait();
892 template<
typename VT1
896 inline typename EnableIf< And< IsDenseVector<VT1>
897 , Or< Not< IsSMPAssignable<VT1> >
898 , Not< IsSMPAssignable<VT2> > > > >::Type
899 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
905 multAssign( ~lhs, ~rhs );
930 template<
typename VT1
934 inline typename EnableIf< And< IsDenseVector<VT1>
935 , IsSMPAssignable<VT1>
936 , IsSMPAssignable<VT2> > >::Type
937 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
949 multAssign( ~lhs, ~rhs );
952 smpMultAssign_backend( ~lhs, ~rhs );
Header file for mathematical functions.
Header file for basic type definitions.
Header file for the SparseVector base class.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:252
Header file for the complete DenseSubvector implementation.
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
Header file for the IsSame and IsStrictlySame type traits.
Header file for the And class template.
Header file for the DenseVector base class.
Header file for the intrinsic trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
Header file for the complete SparseSubvector implementation.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
Header file for the Or class template.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the Not class template.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the C++11 and Boost thread backend.
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.