35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
84 template<
typename VT1
88 inline void smpAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
114 template<
typename VT1
118 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
119 smpAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
132 typedef IntrinsicTrait<typename VT1::ElementType> IT;
133 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
134 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
136 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
137 const bool lhsAligned ( (~lhs).isAligned() );
138 const bool rhsAligned ( (~rhs).isAligned() );
140 #pragma omp parallel shared( lhs, rhs )
142 const int threads ( omp_get_num_threads() );
143 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
144 const size_t equalShare ( (~lhs).size() / threads + addon );
145 const size_t rest ( equalShare & ( IT::size - 1UL ) );
146 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
148 #pragma omp for schedule(dynamic,1) nowait
149 for(
int i=0UL; i<threads; ++i )
151 const size_t index( i*sizePerThread );
153 if( index >= (~lhs).size() )
156 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
158 if( vectorizable && lhsAligned && rhsAligned ) {
159 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
160 assign( target, subvector<aligned>( ~rhs, index, size ) );
162 else if( vectorizable && lhsAligned ) {
163 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
164 assign( target, subvector<unaligned>( ~rhs, index, size ) );
166 else if( vectorizable && rhsAligned ) {
167 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
168 assign( target, subvector<aligned>( ~rhs, index, size ) );
171 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
172 assign( target, subvector<unaligned>( ~rhs, index, size ) );
196 template<
typename VT1
200 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
201 smpAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
214 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
216 #pragma omp parallel shared( lhs, rhs )
218 const int threads ( omp_get_num_threads() );
219 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
220 const size_t sizePerThread( (~lhs).size() / threads + addon );
222 #pragma omp for schedule(dynamic,1) nowait
223 for(
int i=0UL; i<threads; ++i )
225 const size_t index( i*sizePerThread );
227 if( index >= (~lhs).size() )
230 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
231 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
232 assign( target, subvector<unaligned>( ~rhs, index, size ) );
257 template<
typename VT1
261 inline void smpAddAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
289 template<
typename VT1
293 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
294 smpAddAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
307 typedef IntrinsicTrait<typename VT1::ElementType> IT;
308 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
309 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
311 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
312 const bool lhsAligned ( (~lhs).isAligned() );
313 const bool rhsAligned ( (~rhs).isAligned() );
315 #pragma omp parallel shared( lhs, rhs )
317 const int threads ( omp_get_num_threads() );
318 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
319 const size_t equalShare ( (~lhs).size() / threads + addon );
320 const size_t rest ( equalShare & ( IT::size - 1UL ) );
321 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
323 #pragma omp for schedule(dynamic,1) nowait
324 for(
int i=0UL; i<threads; ++i )
326 const size_t index( i*sizePerThread );
328 if( index >= (~lhs).size() )
331 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
333 if( vectorizable && lhsAligned && rhsAligned ) {
334 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
335 addAssign( target, subvector<aligned>( ~rhs, index, size ) );
337 else if( vectorizable && lhsAligned ) {
338 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
339 addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
341 else if( vectorizable && rhsAligned ) {
342 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
343 addAssign( target, subvector<aligned>( ~rhs, index, size ) );
346 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
347 addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
373 template<
typename VT1
377 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
378 smpAddAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
391 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
393 #pragma omp parallel shared( lhs, rhs )
395 const int threads ( omp_get_num_threads() );
396 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
397 const size_t sizePerThread( (~lhs).size() / threads + addon );
399 #pragma omp for schedule(dynamic,1) nowait
400 for(
int i=0UL; i<threads; ++i )
402 const size_t index( i*sizePerThread );
404 if( index >= (~lhs).size() )
407 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
408 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
409 addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
434 template<
typename VT1
438 inline void smpSubAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
466 template<
typename VT1
470 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
471 smpSubAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
484 typedef IntrinsicTrait<typename VT1::ElementType> IT;
485 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
486 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
488 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
489 const bool lhsAligned ( (~lhs).isAligned() );
490 const bool rhsAligned ( (~rhs).isAligned() );
492 #pragma omp parallel shared( lhs, rhs )
494 const int threads ( omp_get_num_threads() );
495 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
496 const size_t equalShare ( (~lhs).size() / threads + addon );
497 const size_t rest ( equalShare & ( IT::size - 1UL ) );
498 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
500 #pragma omp for schedule(dynamic,1) nowait
501 for(
int i=0UL; i<threads; ++i )
503 const size_t index( i*sizePerThread );
505 if( index >= (~lhs).size() )
508 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
510 if( vectorizable && lhsAligned && rhsAligned ) {
511 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
512 subAssign( target, subvector<aligned>( ~rhs, index, size ) );
514 else if( vectorizable && lhsAligned ) {
515 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
516 subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
518 else if( vectorizable && rhsAligned ) {
519 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
520 subAssign( target, subvector<aligned>( ~rhs, index, size ) );
523 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
524 subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
550 template<
typename VT1
554 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
555 smpSubAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
568 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
570 #pragma omp parallel shared( lhs, rhs )
572 const int threads ( omp_get_num_threads() );
573 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
574 const size_t sizePerThread( (~lhs).size() / threads + addon );
576 #pragma omp for schedule(dynamic,1) nowait
577 for(
int i=0UL; i<threads; ++i )
579 const size_t index( i*sizePerThread );
581 if( index >= (~lhs).size() )
584 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
585 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
586 subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
611 template<
typename VT1
615 inline void smpMultAssign( DenseVector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
643 template<
typename VT1
647 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
648 smpMultAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
661 typedef IntrinsicTrait<typename VT1::ElementType> IT;
662 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
663 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
665 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
666 const bool lhsAligned ( (~lhs).isAligned() );
667 const bool rhsAligned ( (~rhs).isAligned() );
669 #pragma omp parallel shared( lhs, rhs )
671 const int threads ( omp_get_num_threads() );
672 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
673 const size_t equalShare ( (~lhs).size() / threads + addon );
674 const size_t rest ( equalShare & ( IT::size - 1UL ) );
675 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
677 #pragma omp for schedule(dynamic,1) nowait
678 for(
int i=0UL; i<threads; ++i )
680 const size_t index( i*sizePerThread );
682 if( index >= (~lhs).size() )
685 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
687 if( vectorizable && lhsAligned && rhsAligned ) {
688 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
689 multAssign( target, subvector<aligned>( ~rhs, index, size ) );
691 else if( vectorizable && lhsAligned ) {
692 AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
693 multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
695 else if( vectorizable && rhsAligned ) {
696 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
697 multAssign( target, subvector<aligned>( ~rhs, index, size ) );
700 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
701 multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
727 template<
typename VT1
731 typename EnableIfTrue< VT1::smpAssignable && VT2::smpAssignable >::Type
732 smpMultAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
745 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
747 #pragma omp parallel shared( lhs, rhs )
749 const int threads ( omp_get_num_threads() );
750 const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
751 const size_t sizePerThread( (~lhs).size() / threads + addon );
753 #pragma omp for schedule(dynamic,1) nowait
754 for(
int i=0UL; i<threads; ++i )
756 const size_t index( i*sizePerThread );
758 if( index >= (~lhs).size() )
761 const size_t size( min( sizePerThread, (~lhs).size() - index ) );
762 UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
763 multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
Header file for mathematical functions.
Header file for the SparseVector base class.
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
Header file for the complete DenseSubvector implementation.
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:178
Header file for the IsSame and IsStrictlySame type traits.
Header file for the DenseVector base class.
Header file for the intrinsic trait.
Header file for the complete SparseSubvector implementation.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Header file for the serial section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the EnableIf class template.
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:211
Header file for run time assertion macros.
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: OpenMP.h:65
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
System settings for the OpenMP parallelization.
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the FunctionTrace class.