35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
89 template<
typename VT1
93 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
101 typedef IntrinsicTrait<typename VT1::ElementType> IT;
102 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
103 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
105 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
106 const bool lhsAligned ( (~lhs).isAligned() );
107 const bool rhsAligned ( (~rhs).isAligned() );
109 const size_t threads ( TheThreadBackend::size() );
110 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
111 const size_t equalShare ( (~lhs).size() / threads + addon );
112 const size_t rest ( equalShare & ( IT::size - 1UL ) );
113 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
115 for(
size_t i=0UL; i<threads; ++i )
117 const size_t index( i*sizePerThread );
119 if( index >= (~lhs).size() )
122 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
124 if( vectorizable && lhsAligned && rhsAligned ) {
125 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
126 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index, size ) );
128 else if( vectorizable && lhsAligned ) {
129 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
130 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
132 else if( vectorizable && rhsAligned ) {
133 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
134 TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index, size ) );
137 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
138 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
142 TheThreadBackend::wait();
165 template<
typename VT1
169 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
177 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
179 const size_t threads ( TheThreadBackend::size() );
180 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
181 const size_t sizePerThread( (~lhs).size() / threads + addon );
183 for(
size_t i=0UL; i<threads; ++i )
185 const size_t index( i*sizePerThread );
187 if( index >= (~lhs).size() )
190 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
191 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
192 TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
195 TheThreadBackend::wait();
219 template<
typename VT1
223 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
224 smpAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
254 template<
typename VT1
258 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
259 smpAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
274 smpAssign_backend( ~lhs, ~rhs );
307 template<
typename VT1
311 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
319 typedef IntrinsicTrait<typename VT1::ElementType> IT;
320 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
321 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
323 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
324 const bool lhsAligned ( (~lhs).isAligned() );
325 const bool rhsAligned ( (~rhs).isAligned() );
327 const size_t threads ( TheThreadBackend::size() );
328 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
329 const size_t equalShare ( (~lhs).size() / threads + addon );
330 const size_t rest ( equalShare & ( IT::size - 1UL ) );
331 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
333 for(
size_t i=0UL; i<threads; ++i )
335 const size_t index( i*sizePerThread );
337 if( index >= (~lhs).size() )
340 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
342 if( vectorizable && lhsAligned && rhsAligned ) {
343 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
344 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index, size ) );
346 else if( vectorizable && lhsAligned ) {
347 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
348 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
350 else if( vectorizable && rhsAligned ) {
351 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
352 TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index, size ) );
355 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
356 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
360 TheThreadBackend::wait();
383 template<
typename VT1
387 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
395 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
397 const size_t threads ( TheThreadBackend::size() );
398 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
399 const size_t sizePerThread( (~lhs).size() / threads + addon );
401 for(
size_t i=0UL; i<threads; ++i )
403 const size_t index( i*sizePerThread );
405 if( index >= (~lhs).size() )
408 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
409 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
410 TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
413 TheThreadBackend::wait();
438 template<
typename VT1
442 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
443 smpAddAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
473 template<
typename VT1
477 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
478 smpAddAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
493 smpAddAssign_backend( ~lhs, ~rhs );
526 template<
typename VT1
530 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
538 typedef IntrinsicTrait<typename VT1::ElementType> IT;
539 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
540 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
542 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
543 const bool lhsAligned ( (~lhs).isAligned() );
544 const bool rhsAligned ( (~rhs).isAligned() );
546 const size_t threads ( TheThreadBackend::size() );
547 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
548 const size_t equalShare ( (~lhs).size() / threads + addon );
549 const size_t rest ( equalShare & ( IT::size - 1UL ) );
550 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
552 for(
size_t i=0UL; i<threads; ++i )
554 const size_t index( i*sizePerThread );
556 if( index >= (~lhs).size() )
559 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
561 if( vectorizable && lhsAligned && rhsAligned ) {
562 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
563 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index, size ) );
565 else if( vectorizable && lhsAligned ) {
566 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
567 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
569 else if( vectorizable && rhsAligned ) {
570 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
571 TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index, size ) );
574 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
575 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
579 TheThreadBackend::wait();
602 template<
typename VT1
606 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
614 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
616 const size_t threads ( TheThreadBackend::size() );
617 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
618 const size_t sizePerThread( (~lhs).size() / threads + addon );
620 for(
size_t i=0UL; i<threads; ++i )
622 const size_t index( i*sizePerThread );
624 if( index >= (~lhs).size() )
627 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
628 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
629 TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
632 TheThreadBackend::wait();
657 template<
typename VT1
661 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
662 smpSubAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
693 template<
typename VT1
697 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
698 smpSubAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
713 smpSubAssign_backend( ~lhs, ~rhs );
746 template<
typename VT1
750 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
758 typedef IntrinsicTrait<typename VT1::ElementType> IT;
759 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
760 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
762 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
763 const bool lhsAligned ( (~lhs).isAligned() );
764 const bool rhsAligned ( (~rhs).isAligned() );
766 const size_t threads ( TheThreadBackend::size() );
767 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
768 const size_t equalShare ( (~lhs).size() / threads + addon );
769 const size_t rest ( equalShare & ( IT::size - 1UL ) );
770 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
772 for(
size_t i=0UL; i<threads; ++i )
774 const size_t index( i*sizePerThread );
776 if( index >= (~lhs).size() )
779 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
781 if( vectorizable && lhsAligned && rhsAligned ) {
782 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
783 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index, size ) );
785 else if( vectorizable && lhsAligned ) {
786 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
787 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
789 else if( vectorizable && rhsAligned ) {
790 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
791 TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index, size ) );
794 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
795 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
799 TheThreadBackend::wait();
822 template<
typename VT1
826 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
834 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
836 const size_t threads ( TheThreadBackend::size() );
837 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
838 const size_t sizePerThread( (~lhs).size() / threads + addon );
840 for(
size_t i=0UL; i<threads; ++i )
842 const size_t index( i*sizePerThread );
844 if( index >= (~lhs).size() )
847 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
848 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
849 TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
852 TheThreadBackend::wait();
877 template<
typename VT1
881 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
882 smpMultAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
913 template<
typename VT1
917 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
918 smpMultAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
933 smpMultAssign_backend( ~lhs, ~rhs );
Header file for mathematical functions.
Header file for the SparseVector base class.
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
Header file for the complete DenseSubvector implementation.
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:179
Header file for the IsSame and IsStrictlySame type traits.
Header file for the And class template.
Header file for the DenseVector base class.
Header file for the intrinsic trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
Header file for the complete SparseSubvector implementation.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Header file for the DisableIf class template.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:361
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:245
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:212
Header file for run time assertion macros.
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:212
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the C++11 and Boost thread backend.
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.