35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
92 template<
typename VT1
96 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
104 typedef IntrinsicTrait<typename VT1::ElementType> IT;
105 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
106 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
108 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
109 const bool lhsAligned ( (~lhs).isAligned() );
110 const bool rhsAligned ( (~rhs).isAligned() );
112 const int threads ( omp_get_num_threads() );
113 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
114 const size_t equalShare ( (~lhs).
size() / threads + addon );
115 const size_t rest ( equalShare & (
IT::size - 1UL ) );
116 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
118 #pragma omp for schedule(dynamic,1) nowait
119 for(
int i=0UL; i<threads; ++i )
121 const size_t index( i*sizePerThread );
123 if( index >= (~lhs).
size() )
126 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
128 if( vectorizable && lhsAligned && rhsAligned ) {
129 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
130 assign( target, subvector<aligned>( ~rhs, index,
size ) );
132 else if( vectorizable && lhsAligned ) {
133 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
134 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
136 else if( vectorizable && rhsAligned ) {
137 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
138 assign( target, subvector<aligned>( ~rhs, index,
size ) );
141 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
142 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
166 template<
typename VT1
170 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
178 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
180 const int threads ( omp_get_num_threads() );
181 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
182 const size_t sizePerThread( (~lhs).
size() / threads + addon );
184 #pragma omp for schedule(dynamic,1) nowait
185 for(
int i=0UL; i<threads; ++i )
187 const size_t index( i*sizePerThread );
189 if( index >= (~lhs).
size() )
192 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
193 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
194 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
219 template<
typename VT1
223 inline typename EnableIf< And< IsDenseVector<VT1>
224 , Or< Not< IsSMPAssignable<VT1> >
225 , Not< IsSMPAssignable<VT2> > > > >::Type
226 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
256 template<
typename VT1
260 inline typename EnableIf< And< IsDenseVector<VT1>
261 , IsSMPAssignable<VT1>
262 , IsSMPAssignable<VT2> > >::Type
263 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
278 #pragma omp parallel shared( lhs, rhs )
279 smpAssign_backend( ~lhs, ~rhs );
311 template<
typename VT1
315 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
323 typedef IntrinsicTrait<typename VT1::ElementType> IT;
324 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
325 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
327 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
328 const bool lhsAligned ( (~lhs).isAligned() );
329 const bool rhsAligned ( (~rhs).isAligned() );
331 const int threads ( omp_get_num_threads() );
332 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
333 const size_t equalShare ( (~lhs).
size() / threads + addon );
334 const size_t rest ( equalShare & (
IT::size - 1UL ) );
335 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
337 #pragma omp for schedule(dynamic,1) nowait
338 for(
int i=0UL; i<threads; ++i )
340 const size_t index( i*sizePerThread );
342 if( index >= (~lhs).
size() )
345 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
347 if( vectorizable && lhsAligned && rhsAligned ) {
348 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
349 addAssign( target, subvector<aligned>( ~rhs, index,
size ) );
351 else if( vectorizable && lhsAligned ) {
352 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
353 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
355 else if( vectorizable && rhsAligned ) {
356 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
357 addAssign( target, subvector<aligned>( ~rhs, index,
size ) );
360 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
361 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
385 template<
typename VT1
389 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
397 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
399 const int threads ( omp_get_num_threads() );
400 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
401 const size_t sizePerThread( (~lhs).
size() / threads + addon );
403 #pragma omp for schedule(dynamic,1) nowait
404 for(
int i=0UL; i<threads; ++i )
406 const size_t index( i*sizePerThread );
408 if( index >= (~lhs).
size() )
411 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
412 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
413 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
438 template<
typename VT1
442 inline typename EnableIf< And< IsDenseVector<VT1>
443 , Or< Not< IsSMPAssignable<VT1> >
444 , Not< IsSMPAssignable<VT2> > > > >::Type
445 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
475 template<
typename VT1
479 inline typename EnableIf< And< IsDenseVector<VT1>
480 , IsSMPAssignable<VT1>
481 , IsSMPAssignable<VT2> > >::Type
482 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
497 #pragma omp parallel shared( lhs, rhs )
498 smpAddAssign_backend( ~lhs, ~rhs );
530 template<
typename VT1
534 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
542 typedef IntrinsicTrait<typename VT1::ElementType> IT;
543 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
544 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
546 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
547 const bool lhsAligned ( (~lhs).isAligned() );
548 const bool rhsAligned ( (~rhs).isAligned() );
550 const int threads ( omp_get_num_threads() );
551 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
552 const size_t equalShare ( (~lhs).
size() / threads + addon );
553 const size_t rest ( equalShare & (
IT::size - 1UL ) );
554 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
556 #pragma omp for schedule(dynamic,1) nowait
557 for(
int i=0UL; i<threads; ++i )
559 const size_t index( i*sizePerThread );
561 if( index >= (~lhs).
size() )
564 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
566 if( vectorizable && lhsAligned && rhsAligned ) {
567 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
568 subAssign( target, subvector<aligned>( ~rhs, index,
size ) );
570 else if( vectorizable && lhsAligned ) {
571 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
572 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
574 else if( vectorizable && rhsAligned ) {
575 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
576 subAssign( target, subvector<aligned>( ~rhs, index,
size ) );
579 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
580 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
604 template<
typename VT1
608 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
616 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
618 const int threads ( omp_get_num_threads() );
619 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
620 const size_t sizePerThread( (~lhs).
size() / threads + addon );
622 #pragma omp for schedule(dynamic,1) nowait
623 for(
int i=0UL; i<threads; ++i )
625 const size_t index( i*sizePerThread );
627 if( index >= (~lhs).
size() )
630 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
631 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
632 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
657 template<
typename VT1
661 inline typename EnableIf< And< IsDenseVector<VT1>
662 , Or< Not< IsSMPAssignable<VT1> >
663 , Not< IsSMPAssignable<VT2> > > > >::Type
664 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
694 template<
typename VT1
698 inline typename EnableIf< And< IsDenseVector<VT1>
699 , IsSMPAssignable<VT1>
700 , IsSMPAssignable<VT2> > >::Type
701 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
716 #pragma omp parallel shared( lhs, rhs )
717 smpSubAssign_backend( ~lhs, ~rhs );
750 template<
typename VT1
754 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
762 typedef IntrinsicTrait<typename VT1::ElementType> IT;
763 typedef typename SubvectorExprTrait<VT1,aligned>::Type AlignedTarget;
764 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
766 const bool vectorizable( VT1::vectorizable && VT2::vectorizable && IsSame<ET1,ET2>::value );
767 const bool lhsAligned ( (~lhs).isAligned() );
768 const bool rhsAligned ( (~rhs).isAligned() );
770 const int threads ( omp_get_num_threads() );
771 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
772 const size_t equalShare ( (~lhs).
size() / threads + addon );
773 const size_t rest ( equalShare & (
IT::size - 1UL ) );
774 const size_t sizePerThread( ( vectorizable && rest )?( equalShare - rest +
IT::size ):( equalShare ) );
776 #pragma omp for schedule(dynamic,1) nowait
777 for(
int i=0UL; i<threads; ++i )
779 const size_t index( i*sizePerThread );
781 if( index >= (~lhs).
size() )
784 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
786 if( vectorizable && lhsAligned && rhsAligned ) {
787 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
790 else if( vectorizable && lhsAligned ) {
791 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
794 else if( vectorizable && rhsAligned ) {
795 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
799 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
825 template<
typename VT1
829 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
837 typedef typename SubvectorExprTrait<VT1,unaligned>::Type UnalignedTarget;
839 const int threads ( omp_get_num_threads() );
840 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
841 const size_t sizePerThread( (~lhs).
size() / threads + addon );
843 #pragma omp for schedule(dynamic,1) nowait
844 for(
int i=0UL; i<threads; ++i )
846 const size_t index( i*sizePerThread );
848 if( index >= (~lhs).
size() )
851 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
852 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
878 template<
typename VT1
882 inline typename EnableIf< And< IsDenseVector<VT1>
883 , Or< Not< IsSMPAssignable<VT1> >
884 , Not< IsSMPAssignable<VT2> > > > >::Type
885 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
915 template<
typename VT1
919 inline typename EnableIf< And< IsDenseVector<VT1>
920 , IsSMPAssignable<VT1>
921 , IsSMPAssignable<VT2> > >::Type
922 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
937 #pragma omp parallel shared( lhs, rhs )
938 smpMultAssign_backend( ~lhs, ~rhs );
Header file for mathematical functions.
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Header file for the SparseVector base class.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
Header file for the complete DenseSubvector implementation.
Header file for the IsSame and IsStrictlySame type traits.
Header file for the And class template.
Header file for the DenseVector base class.
Header file for the intrinsic trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
Header file for the complete SparseSubvector implementation.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
Header file for the Or class template.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:947
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:245
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:212
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:212
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for basic type definitions.
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849