35#ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36#define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
95void openmpAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
101 using ET1 = ElementType_t<VT1>;
102 using ET2 = ElementType_t<VT2>;
104 constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
105 constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >
::size );
107 const bool lhsAligned( (*lhs).isAligned() );
108 const bool rhsAligned( (*rhs).isAligned() );
110 const int threads ( omp_get_num_threads() );
111 const size_t addon ( ( ( (*lhs).size() % threads ) != 0UL )? 1UL : 0UL );
112 const size_t equalShare ( (*lhs).size() / threads + addon );
113 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
114 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
116#pragma omp for schedule(dynamic,1) nowait
117 for(
int i=0UL; i<threads; ++i )
119 const size_t index( i*sizePerThread );
121 if( index >= (*lhs).size() )
124 const size_t size(
min( sizePerThread, (*lhs).size() - index ) );
126 if( simdEnabled && lhsAligned && rhsAligned ) {
127 auto target( subvector<aligned>( *lhs, index,
size,
unchecked ) );
128 const auto source( subvector<aligned>( *rhs, index,
size,
unchecked ) );
129 op( target, source );
131 else if( simdEnabled && lhsAligned ) {
132 auto target( subvector<aligned>( *lhs, index,
size,
unchecked ) );
133 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
134 op( target, source );
136 else if( simdEnabled && rhsAligned ) {
137 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
138 const auto source( subvector<aligned>( *rhs, index,
size,
unchecked ) );
139 op( target, source );
142 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
143 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
144 op( target, source );
169template<
typename VT1
174void openmpAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
180 const int threads ( omp_get_num_threads() );
181 const size_t addon ( ( ( (*lhs).size() % threads ) != 0UL )? 1UL : 0UL );
182 const size_t sizePerThread( (*lhs).size() / threads + addon );
184#pragma omp for schedule(dynamic,1) nowait
185 for(
int i=0UL; i<threads; ++i )
187 const size_t index( i*sizePerThread );
189 if( index >= (*lhs).size() )
192 const size_t size(
min( sizePerThread, (*lhs).size() - index ) );
193 auto target( subvector<unaligned>( *lhs, index,
size,
unchecked ) );
194 const auto source( subvector<unaligned>( *rhs, index,
size,
unchecked ) );
195 op( target, source );
228template<
typename VT1
232inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
233 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
239 assign( *lhs, *rhs );
263template<
typename VT1
267inline auto smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
268 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
280 assign( *lhs, *rhs );
283#pragma omp parallel shared( lhs, rhs )
284 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
318template<
typename VT1
322inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
323 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
329 addAssign( *lhs, *rhs );
353template<
typename VT1
357inline auto smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
358 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
370 addAssign( *lhs, *rhs );
373#pragma omp parallel shared( lhs, rhs )
374 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
408template<
typename VT1
412inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
413 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
419 subAssign( *lhs, *rhs );
443template<
typename VT1
447inline auto smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
448 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
460 subAssign( *lhs, *rhs );
463#pragma omp parallel shared( lhs, rhs )
464 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
498template<
typename VT1
502inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
503 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
509 multAssign( *lhs, *rhs );
533template<
typename VT1
537inline auto smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
538 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
550 multAssign( *lhs, *rhs );
553#pragma omp parallel shared( lhs, rhs )
554 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ multAssign( a, b ); } );
588template<
typename VT1
592inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
593 -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
599 divAssign( *lhs, *rhs );
623template<
typename VT1
627inline auto smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
628 -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
640 divAssign( *lhs, *rhs );
643#pragma omp parallel shared( lhs, rhs )
644 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ divAssign( a, b ); } );
Header file for auxiliary alias declarations.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the IsDenseVector type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsSMPAssignable type trait.
Deactivation of problematic macros.
Header file for the parallel section implementation.
Header file for the SIMD trait.
Constraint on the data type.
Header file for the serial section implementation.
Header file for the DenseVector base class.
Header file for the SparseVector base class.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.
Definition: SMPAssignable.h:81
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:221
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP assignment of a vector to a dense vector.
Definition: DenseVector.h:105
auto smpAddAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP addition assignment of a vector to a dense vector.
Definition: DenseVector.h:134
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.
Definition: ParallelSection.h:254
auto smpSubAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP subtraction assignment of a vector to a dense vector.
Definition: DenseVector.h:163
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.
Definition: SMP.h:68
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
constexpr Unchecked unchecked
Global Unchecked instance.
Definition: Check.h:146
System settings for the shared-memory parallelization.
Header file for basic type definitions.
Header file for the generic min algorithm.
Header file for the implementation of the Subvector view.