35#ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36#define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
97void openmpAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
103 using ET1 = ElementType_t<MT1>;
104 using ET2 = ElementType_t<MT2>;
106 constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
107 constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >
::size );
109 const bool lhsAligned( (*lhs).isAligned() );
110 const bool rhsAligned( (*rhs).isAligned() );
112 const int threads( omp_get_num_threads() );
113 const ThreadMapping threadmap( createThreadMapping( threads, *rhs ) );
115 const size_t addon1 ( ( ( (*rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
116 const size_t equalShare1( (*rhs).rows() / threadmap.first + addon1 );
117 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
118 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
120 const size_t addon2 ( ( ( (*rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
121 const size_t equalShare2( (*rhs).columns() / threadmap.second + addon2 );
122 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
123 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
125#pragma omp for schedule(dynamic,1) nowait
126 for(
int i=0; i<threads; ++i )
128 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
129 const size_t column( ( i % threadmap.second ) * colsPerThread );
131 if(
row >= (*rhs).rows() ||
column >= (*rhs).columns() )
134 const size_t m(
min( rowsPerThread, (*rhs).rows() -
row ) );
135 const size_t n(
min( colsPerThread, (*rhs).columns() -
column ) );
137 if( simdEnabled && lhsAligned && rhsAligned ) {
138 auto target( submatrix<aligned>( *lhs,
row,
column, m, n ) );
139 const auto source( submatrix<aligned>( *rhs,
row,
column, m, n ) );
140 op( target, source );
142 else if( simdEnabled && lhsAligned ) {
143 auto target( submatrix<aligned>( *lhs,
row,
column, m, n ) );
144 const auto source( submatrix<unaligned>( *rhs,
row,
column, m, n ) );
145 op( target, source );
147 else if( simdEnabled && rhsAligned ) {
148 auto target( submatrix<unaligned>( *lhs,
row,
column, m, n ) );
149 const auto source( submatrix<aligned>( *rhs,
row,
column, m, n ) );
150 op( target, source );
153 auto target( submatrix<unaligned>( *lhs,
row,
column, m, n ) );
154 const auto source( submatrix<unaligned>( *rhs,
row,
column, m, n ) );
155 op( target, source );
180template<
typename MT1
185void openmpAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
191 const size_t threads( omp_get_num_threads() );
192 const ThreadMapping threadmap( createThreadMapping( threads, *rhs ) );
194 const size_t addon1 ( ( ( (*rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
195 const size_t rowsPerThread( (*rhs).rows() / threadmap.first + addon1 );
197 const size_t addon2 ( ( ( (*rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
198 const size_t colsPerThread( (*rhs).columns() / threadmap.second + addon2 );
200#pragma omp for schedule(dynamic,1) nowait
201 for(
size_t i=0; i<threads; ++i )
203 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
204 const size_t column( ( i % threadmap.second ) * colsPerThread );
206 if(
row >= (*rhs).rows() ||
column >= (*rhs).columns() )
209 const size_t m(
min( rowsPerThread, (*lhs).rows() -
row ) );
210 const size_t n(
min( colsPerThread, (*lhs).columns() -
column ) );
212 auto target( submatrix<unaligned>( *lhs,
row,
column, m, n ) );
213 const auto source( submatrix<unaligned>( *rhs,
row,
column, m, n ) );
214 op( target, source );
247template<
typename MT1
251inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
252 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
259 assign( *lhs, *rhs );
283template<
typename MT1
287inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
288 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
301 assign( *lhs, *rhs );
304#pragma omp parallel shared( lhs, rhs )
305 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
339template<
typename MT1
343inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
344 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
351 addAssign( *lhs, *rhs );
375template<
typename MT1
379inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
380 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
393 addAssign( *lhs, *rhs );
396#pragma omp parallel shared( lhs, rhs )
397 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
431template<
typename MT1
435inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
436 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
443 subAssign( *lhs, *rhs );
467template<
typename MT1
471inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
472 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
485 subAssign( *lhs, *rhs );
488#pragma omp parallel shared( lhs, rhs )
489 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
523template<
typename MT1
527inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
528 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
535 schurAssign( *lhs, *rhs );
559template<
typename MT1
563inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
564 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
577 schurAssign( *lhs, *rhs );
580#pragma omp parallel shared( lhs, rhs )
581 openmpAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ schurAssign( a, b ); } );
613template<
typename MT1
617inline auto smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
618 -> EnableIf_t< IsDenseMatrix_v<MT1> >
625 multAssign( *lhs, *rhs );
Header file for auxiliary alias declarations.
Header file for the alignment flag enumeration.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the IsDenseMatrix type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsSMPAssignable type trait.
Header file for the parallel section implementation.
Header file for the SIMD trait.
Constraint on the data type.
Header file for the serial section implementation.
Header file for the SMP thread mapping functionality.
Header file for the DenseMatrix base class.
Header file for the SparseMatrix base class.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:137
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.
Definition: SMPAssignable.h:81
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:137
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.
Definition: ParallelSection.h:254
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.
Definition: SMP.h:68
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
Header file for the matrix storage order types.
System settings for the shared-memory parallelization.
Header file for basic type definitions.
Header file for the generic min algorithm.
Header file for the implementation of the Submatrix view.