35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 97 template<
typename VT1
102 void openmpAssign( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs, OP op )
108 using ET1 = ElementType_<VT1>;
109 using ET2 = ElementType_<VT2>;
111 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
112 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
114 const bool lhsAligned( (~lhs).isAligned() );
115 const bool rhsAligned( (~rhs).isAligned() );
117 const int threads ( omp_get_num_threads() );
118 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
119 const size_t equalShare ( (~lhs).
size() / threads + addon );
120 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
121 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
123 #pragma omp for schedule(dynamic,1) nowait 124 for(
int i=0UL; i<threads; ++i )
126 const size_t index( i*sizePerThread );
128 if( index >= (~lhs).
size() )
131 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
133 if( simdEnabled && lhsAligned && rhsAligned ) {
134 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
135 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
136 op( target, source );
138 else if( simdEnabled && lhsAligned ) {
139 auto target( subvector<aligned>( ~lhs, index,
size,
unchecked ) );
140 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
141 op( target, source );
143 else if( simdEnabled && rhsAligned ) {
144 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
145 const auto source( subvector<aligned>( ~rhs, index,
size,
unchecked ) );
146 op( target, source );
149 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
150 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
151 op( target, source );
176 template<
typename VT1
181 void openmpAssign( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs, OP op )
187 const int threads ( omp_get_num_threads() );
188 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
189 const size_t sizePerThread( (~lhs).
size() / threads + addon );
191 #pragma omp for schedule(dynamic,1) nowait 192 for(
int i=0UL; i<threads; ++i )
194 const size_t index( i*sizePerThread );
196 if( index >= (~lhs).
size() )
199 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
200 auto target( subvector<unaligned>( ~lhs, index,
size,
unchecked ) );
201 const auto source( subvector<unaligned>( ~rhs, index,
size,
unchecked ) );
202 op( target, source );
235 template<
typename VT1
239 inline EnableIf_< And< IsDenseVector<VT1>
240 , Or< Not< IsSMPAssignable<VT1> >
241 , Not< IsSMPAssignable<VT2> > > > >
242 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
248 assign( ~lhs, ~rhs );
272 template<
typename VT1
276 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
277 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
289 assign( ~lhs, ~rhs );
292 #pragma omp parallel shared( lhs, rhs ) 293 openmpAssign( ~lhs, ~rhs, Assign() );
327 template<
typename VT1
331 inline EnableIf_< And< IsDenseVector<VT1>
332 , Or< Not< IsSMPAssignable<VT1> >
333 , Not< IsSMPAssignable<VT2> > > > >
334 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
340 addAssign( ~lhs, ~rhs );
364 template<
typename VT1
368 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
369 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
381 addAssign( ~lhs, ~rhs );
384 #pragma omp parallel shared( lhs, rhs ) 385 openmpAssign( ~lhs, ~rhs, AddAssign() );
419 template<
typename VT1
423 inline EnableIf_< And< IsDenseVector<VT1>
424 , Or< Not< IsSMPAssignable<VT1> >
425 , Not< IsSMPAssignable<VT2> > > > >
426 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
432 subAssign( ~lhs, ~rhs );
456 template<
typename VT1
460 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
461 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
473 subAssign( ~lhs, ~rhs );
476 #pragma omp parallel shared( lhs, rhs ) 477 openmpAssign( ~lhs, ~rhs, SubAssign() );
511 template<
typename VT1
515 inline EnableIf_< And< IsDenseVector<VT1>
516 , Or< Not< IsSMPAssignable<VT1> >
517 , Not< IsSMPAssignable<VT2> > > > >
518 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
524 multAssign( ~lhs, ~rhs );
548 template<
typename VT1
552 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
553 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
565 multAssign( ~lhs, ~rhs );
568 #pragma omp parallel shared( lhs, rhs ) 569 openmpAssign( ~lhs, ~rhs, MultAssign() );
603 template<
typename VT1
607 inline EnableIf_< And< IsDenseVector<VT1>
608 , Or< Not< IsSMPAssignable<VT1> >
609 , Not< IsSMPAssignable<VT2> > > > >
610 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
616 divAssign( ~lhs, ~rhs );
640 template<
typename VT1
644 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
645 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
657 divAssign( ~lhs, ~rhs );
660 #pragma omp parallel shared( lhs, rhs ) 661 openmpAssign( ~lhs, ~rhs, DivAssign() );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the Assign functor.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the AddAssign functor.
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the MultAssign functor.
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the DivAssign functor.
Constraint on the data type.
Header file for the function trace functionality.