35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
89 template<
typename VT1
93 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
101 typedef IntrinsicTrait<typename VT1::ElementType> IT;
102 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
103 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
105 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
106 const bool lhsAligned ( (~lhs).isAligned() );
107 const bool rhsAligned ( (~rhs).isAligned() );
109 const int threads ( omp_get_num_threads() );
110 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
111 const size_t equalShare ( (~lhs).size() / threads + addon );
112 const size_t rest ( equalShare & ( IT::size - 1UL ) );
113 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
115 #pragma omp for schedule(dynamic,1) nowait
116 for(
int i=0UL; i<threads; ++i )
118 const size_t index( i*sizePerThread );
120 if( index >= (~lhs).size() )
123 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
125 if( vectorizable && lhsAligned && rhsAligned ) {
126 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
127 assign( target, subvector<aligned>( ~rhs, index, size ) );
129 else if( vectorizable && lhsAligned ) {
130 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
131 assign( target, subvector<unaligned>( ~rhs, index, size ) );
133 else if( vectorizable && rhsAligned ) {
134 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
135 assign( target, subvector<aligned>( ~rhs, index, size ) );
138 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
139 assign( target, subvector<unaligned>( ~rhs, index, size ) );
163 template<
typename VT1
167 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
175 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
177 const int threads ( omp_get_num_threads() );
178 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
179 const size_t sizePerThread( (~lhs).size() / threads + addon );
181 #pragma omp for schedule(dynamic,1) nowait
182 for(
int i=0UL; i<threads; ++i )
184 const size_t index( i*sizePerThread );
186 if( index >= (~lhs).size() )
189 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
190 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
191 assign( target, subvector<unaligned>( ~rhs, index, size ) );
216 template<
typename VT1
220 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
221 smpAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
251 template<
typename VT1
255 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
256 smpAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
271 #pragma omp parallel shared( lhs, rhs )
272 smpAssign_backend( ~lhs, ~rhs );
304 template<
typename VT1
308 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
316 typedef IntrinsicTrait<typename VT1::ElementType> IT;
317 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
318 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
320 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
321 const bool lhsAligned ( (~lhs).isAligned() );
322 const bool rhsAligned ( (~rhs).isAligned() );
324 const int threads ( omp_get_num_threads() );
325 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
326 const size_t equalShare ( (~lhs).size() / threads + addon );
327 const size_t rest ( equalShare & ( IT::size - 1UL ) );
328 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
330 #pragma omp for schedule(dynamic,1) nowait
331 for(
int i=0UL; i<threads; ++i )
333 const size_t index( i*sizePerThread );
335 if( index >= (~lhs).size() )
338 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
340 if( vectorizable && lhsAligned && rhsAligned ) {
341 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
342 addAssign( target, subvector<aligned>( ~rhs, index, size ) );
344 else if( vectorizable && lhsAligned ) {
345 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
346 addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
348 else if( vectorizable && rhsAligned ) {
349 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
350 addAssign( target, subvector<aligned>( ~rhs, index, size ) );
353 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
354 addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
378 template<
typename VT1
382 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
390 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
392 const int threads ( omp_get_num_threads() );
393 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
394 const size_t sizePerThread( (~lhs).size() / threads + addon );
396 #pragma omp for schedule(dynamic,1) nowait
397 for(
int i=0UL; i<threads; ++i )
399 const size_t index( i*sizePerThread );
401 if( index >= (~lhs).size() )
404 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
405 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
406 addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
431 template<
typename VT1
435 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
436 smpAddAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
466 template<
typename VT1
470 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
471 smpAddAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
486 #pragma omp parallel shared( lhs, rhs )
487 smpAddAssign_backend( ~lhs, ~rhs );
519 template<
typename VT1
523 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
531 typedef IntrinsicTrait<typename VT1::ElementType> IT;
532 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
533 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
535 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
536 const bool lhsAligned ( (~lhs).isAligned() );
537 const bool rhsAligned ( (~rhs).isAligned() );
539 const int threads ( omp_get_num_threads() );
540 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
541 const size_t equalShare ( (~lhs).size() / threads + addon );
542 const size_t rest ( equalShare & ( IT::size - 1UL ) );
543 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
545 #pragma omp for schedule(dynamic,1) nowait
546 for(
int i=0UL; i<threads; ++i )
548 const size_t index( i*sizePerThread );
550 if( index >= (~lhs).size() )
553 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
555 if( vectorizable && lhsAligned && rhsAligned ) {
556 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
557 subAssign( target, subvector<aligned>( ~rhs, index, size ) );
559 else if( vectorizable && lhsAligned ) {
560 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
561 subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
563 else if( vectorizable && rhsAligned ) {
564 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
565 subAssign( target, subvector<aligned>( ~rhs, index, size ) );
568 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
569 subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
593 template<
typename VT1
597 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
605 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
607 const int threads ( omp_get_num_threads() );
608 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
609 const size_t sizePerThread( (~lhs).size() / threads + addon );
611 #pragma omp for schedule(dynamic,1) nowait
612 for(
int i=0UL; i<threads; ++i )
614 const size_t index( i*sizePerThread );
616 if( index >= (~lhs).size() )
619 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
620 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
621 subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
646 template<
typename VT1
650 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
651 smpSubAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
681 template<
typename VT1
685 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
686 smpSubAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
701 #pragma omp parallel shared( lhs, rhs )
702 smpSubAssign_backend( ~lhs, ~rhs );
735 template<
typename VT1
739 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
747 typedef IntrinsicTrait<typename VT1::ElementType> IT;
748 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
749 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
751 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
752 const bool lhsAligned ( (~lhs).isAligned() );
753 const bool rhsAligned ( (~rhs).isAligned() );
755 const int threads ( omp_get_num_threads() );
756 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
757 const size_t equalShare ( (~lhs).size() / threads + addon );
758 const size_t rest ( equalShare & ( IT::size - 1UL ) );
759 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
761 #pragma omp for schedule(dynamic,1) nowait
762 for(
int i=0UL; i<threads; ++i )
764 const size_t index( i*sizePerThread );
766 if( index >= (~lhs).size() )
769 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
771 if( vectorizable && lhsAligned && rhsAligned ) {
772 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
773 multAssign( target, subvector<aligned>( ~rhs, index, size ) );
775 else if( vectorizable && lhsAligned ) {
776 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
777 multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
779 else if( vectorizable && rhsAligned ) {
780 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
781 multAssign( target, subvector<aligned>( ~rhs, index, size ) );
784 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
785 multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
810 template<
typename VT1
814 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
822 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
824 const int threads ( omp_get_num_threads() );
825 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
826 const size_t sizePerThread( (~lhs).size() / threads + addon );
828 #pragma omp for schedule(dynamic,1) nowait
829 for(
int i=0UL; i<threads; ++i )
831 const size_t index( i*sizePerThread );
833 if( index >= (~lhs).size() )
836 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
837 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
838 multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
863 template<
typename VT1
867 inline typename DisableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
868 smpMultAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
898 template<
typename VT1
902 inline typename EnableIf< And< IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >::Type
903 smpMultAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
918 #pragma omp parallel shared( lhs, rhs )
919 smpMultAssign_backend( ~lhs, ~rhs );
Header file for mathematical functions.
Header file for the SparseVector base class.
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
Header file for the complete DenseSubvector implementation.
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:179
Header file for the IsSame and IsStrictlySame type traits.
Header file for the And class template.
Header file for the DenseVector base class.
Header file for the intrinsic trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
Header file for the complete SparseSubvector implementation.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Header file for the DisableIf class template.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
Header file for the serial section implementation.
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:361
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:245
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:212
Header file for run time assertion macros.
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:212
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.