35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_ 100 template<
typename MT1
105 void openmpAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
111 using ET1 = ElementType_<MT1>;
112 using ET2 = ElementType_<MT2>;
114 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
115 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
117 const bool lhsAligned( (~lhs).isAligned() );
118 const bool rhsAligned( (~rhs).isAligned() );
120 const int threads( omp_get_num_threads() );
121 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
123 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
124 const size_t equalShare1( (~rhs).
rows() / threadmap.first + addon1 );
125 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
126 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
128 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
129 const size_t equalShare2( (~rhs).
columns() / threadmap.second + addon2 );
130 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
131 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
133 #pragma omp for schedule(dynamic,1) nowait 134 for(
int i=0; i<threads; ++i )
136 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
137 const size_t column( ( i % threadmap.second ) * colsPerThread );
142 const size_t m(
min( rowsPerThread, (~rhs).
rows() -
row ) );
145 if( simdEnabled && lhsAligned && rhsAligned ) {
146 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
147 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
148 op( target, source );
150 else if( simdEnabled && lhsAligned ) {
151 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
152 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
153 op( target, source );
155 else if( simdEnabled && rhsAligned ) {
156 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
157 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
158 op( target, source );
161 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
162 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
163 op( target, source );
188 template<
typename MT1
193 void openmpAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
199 const size_t threads( omp_get_num_threads() );
200 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
202 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
203 const size_t rowsPerThread( (~rhs).
rows() / threadmap.first + addon1 );
205 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
206 const size_t colsPerThread( (~rhs).
columns() / threadmap.second + addon2 );
208 #pragma omp for schedule(dynamic,1) nowait 209 for(
size_t i=0; i<threads; ++i )
211 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
212 const size_t column( ( i % threadmap.second ) * colsPerThread );
217 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
220 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
221 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
222 op( target, source );
255 template<
typename MT1
259 inline EnableIf_< And< IsDenseMatrix<MT1>
260 , Or< Not< IsSMPAssignable<MT1> >
261 , Not< IsSMPAssignable<MT2> > > > >
262 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
269 assign( ~lhs, ~rhs );
293 template<
typename MT1
297 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
298 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
311 assign( ~lhs, ~rhs );
314 #pragma omp parallel shared( lhs, rhs ) 315 openmpAssign( ~lhs, ~rhs, Assign() );
349 template<
typename MT1
353 inline EnableIf_< And< IsDenseMatrix<MT1>
354 , Or< Not< IsSMPAssignable<MT1> >
355 , Not< IsSMPAssignable<MT2> > > > >
356 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
363 addAssign( ~lhs, ~rhs );
387 template<
typename MT1
391 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
392 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
405 addAssign( ~lhs, ~rhs );
408 #pragma omp parallel shared( lhs, rhs ) 409 openmpAssign( ~lhs, ~rhs, AddAssign() );
443 template<
typename MT1
447 inline EnableIf_< And< IsDenseMatrix<MT1>
448 , Or< Not< IsSMPAssignable<MT1> >
449 , Not< IsSMPAssignable<MT2> > > > >
450 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
457 subAssign( ~lhs, ~rhs );
481 template<
typename MT1
485 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
486 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
499 subAssign( ~lhs, ~rhs );
502 #pragma omp parallel shared( lhs, rhs ) 503 openmpAssign( ~lhs, ~rhs, SubAssign() );
537 template<
typename MT1
541 inline EnableIf_< And< IsDenseMatrix<MT1>
542 , Or< Not< IsSMPAssignable<MT1> >
543 , Not< IsSMPAssignable<MT2> > > > >
544 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
551 schurAssign( ~lhs, ~rhs );
575 template<
typename MT1
579 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
580 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
593 schurAssign( ~lhs, ~rhs );
596 #pragma omp parallel shared( lhs, rhs ) 597 openmpAssign( ~lhs, ~rhs, SchurAssign() );
629 template<
typename MT1
633 inline EnableIf_< IsDenseMatrix<MT1> >
634 smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
641 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:131
Headerfile for the generic min algorithm.
Header file for the alignment flag values.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the Assign functor.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the AddAssign functor.
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:506
Header file for the serial section implementation.
Header file for the SchurAssign functor.
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:490
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the MultAssign functor.
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.