35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 94 template<
typename VT1
99 void openmpAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
105 using ET1 = ElementType_t<VT1>;
106 using ET2 = ElementType_t<VT2>;
108 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
109 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >::
size );
111 const bool lhsAligned( (~lhs).isAligned() );
112 const bool rhsAligned( (~rhs).isAligned() );
114 const int threads ( omp_get_num_threads() );
115 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
116 const size_t equalShare ( (~lhs).
size() / threads + addon );
117 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
118 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
120 #pragma omp for schedule(dynamic,1) nowait 121 for(
int i=0UL; i<threads; ++i )
123 const size_t index( i*sizePerThread );
125 if( index >= (~lhs).
size() )
128 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
130 if( simdEnabled && lhsAligned && rhsAligned ) {
131 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
132 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
133 op( target, source );
135 else if( simdEnabled && lhsAligned ) {
136 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
137 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
138 op( target, source );
140 else if( simdEnabled && rhsAligned ) {
141 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
142 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
143 op( target, source );
146 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
147 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
148 op( target, source );
173 template<
typename VT1
178 void openmpAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
184 const int threads ( omp_get_num_threads() );
185 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
186 const size_t sizePerThread( (~lhs).
size() / threads + addon );
188 #pragma omp for schedule(dynamic,1) nowait 189 for(
int i=0UL; i<threads; ++i )
191 const size_t index( i*sizePerThread );
193 if( index >= (~lhs).
size() )
196 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
197 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
198 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
199 op( target, source );
232 template<
typename VT1
236 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
237 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
243 assign( ~lhs, ~rhs );
267 template<
typename VT1
271 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
272 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
284 assign( ~lhs, ~rhs );
287 #pragma omp parallel shared( lhs, rhs ) 288 openmpAssign( ~lhs, ~rhs, Assign() );
322 template<
typename VT1
326 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
327 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
333 addAssign( ~lhs, ~rhs );
357 template<
typename VT1
361 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
362 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
374 addAssign( ~lhs, ~rhs );
377 #pragma omp parallel shared( lhs, rhs ) 378 openmpAssign( ~lhs, ~rhs, AddAssign() );
412 template<
typename VT1
416 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
417 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
423 subAssign( ~lhs, ~rhs );
447 template<
typename VT1
451 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
452 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
464 subAssign( ~lhs, ~rhs );
467 #pragma omp parallel shared( lhs, rhs ) 468 openmpAssign( ~lhs, ~rhs, SubAssign() );
502 template<
typename VT1
506 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
507 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
513 multAssign( ~lhs, ~rhs );
537 template<
typename VT1
541 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
542 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
554 multAssign( ~lhs, ~rhs );
557 #pragma omp parallel shared( lhs, rhs ) 558 openmpAssign( ~lhs, ~rhs, MultAssign() );
592 template<
typename VT1
596 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
597 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
603 divAssign( ~lhs, ~rhs );
627 template<
typename VT1
631 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
632 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
644 divAssign( ~lhs, ~rhs );
647 #pragma omp parallel shared( lhs, rhs ) 648 openmpAssign( ~lhs, ~rhs, DivAssign() );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
Header file for the Assign functor.
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the AddAssign functor.
Header file for the DenseVector base class.
Header file for the SIMD trait.
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:220
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
Header file for the MultAssign functor.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:68
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:191
Header file for the DivAssign functor.
Constraint on the data type.
Header file for the function trace functionality.