35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 89 template<
typename VT1
94 void openmpAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
100 using ET1 = ElementType_t<VT1>;
101 using ET2 = ElementType_t<VT2>;
103 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
104 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >::
size );
106 const bool lhsAligned( (~lhs).isAligned() );
107 const bool rhsAligned( (~rhs).isAligned() );
109 const int threads ( omp_get_num_threads() );
110 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
111 const size_t equalShare ( (~lhs).
size() / threads + addon );
112 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
113 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
115 #pragma omp for schedule(dynamic,1) nowait 116 for(
int i=0UL; i<threads; ++i )
118 const size_t index( i*sizePerThread );
120 if( index >= (~lhs).
size() )
123 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
125 if( simdEnabled && lhsAligned && rhsAligned ) {
126 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
127 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
128 op( target, source );
130 else if( simdEnabled && lhsAligned ) {
131 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
132 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
133 op( target, source );
135 else if( simdEnabled && rhsAligned ) {
136 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
137 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
138 op( target, source );
141 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
142 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
143 op( target, source );
168 template<
typename VT1
173 void openmpAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
179 const int threads ( omp_get_num_threads() );
180 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
181 const size_t sizePerThread( (~lhs).
size() / threads + addon );
183 #pragma omp for schedule(dynamic,1) nowait 184 for(
int i=0UL; i<threads; ++i )
186 const size_t index( i*sizePerThread );
188 if( index >= (~lhs).
size() )
191 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
192 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
193 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
194 op( target, source );
227 template<
typename VT1
231 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
232 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
238 assign( ~lhs, ~rhs );
262 template<
typename VT1
266 inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
267 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
279 assign( ~lhs, ~rhs );
282 #pragma omp parallel shared( lhs, rhs ) 283 openmpAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
317 template<
typename VT1
321 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
322 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
328 addAssign( ~lhs, ~rhs );
352 template<
typename VT1
356 inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
357 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
369 addAssign( ~lhs, ~rhs );
372 #pragma omp parallel shared( lhs, rhs ) 373 openmpAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
407 template<
typename VT1
411 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
412 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
418 subAssign( ~lhs, ~rhs );
442 template<
typename VT1
446 inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
447 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
459 subAssign( ~lhs, ~rhs );
462 #pragma omp parallel shared( lhs, rhs ) 463 openmpAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
497 template<
typename VT1
501 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
502 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
508 multAssign( ~lhs, ~rhs );
532 template<
typename VT1
536 inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
537 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
549 multAssign( ~lhs, ~rhs );
552 #pragma omp parallel shared( lhs, rhs ) 553 openmpAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ multAssign( a, b ); } );
587 template<
typename VT1
591 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
592 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
598 divAssign( ~lhs, ~rhs );
622 template<
typename VT1
626 inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
627 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
639 divAssign( ~lhs, ~rhs );
642 #pragma omp parallel shared( lhs, rhs ) 643 openmpAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ divAssign( a, b ); } );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the DenseVector base class.
Header file for the SIMD trait.
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:220
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:68
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:191
Constraint on the data type.
Header file for the function trace functionality.