35#ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
36#define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
96void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
102 using ET1 = ElementType_t<MT1>;
103 using ET2 = ElementType_t<MT2>;
105 constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
106 constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >
::size );
108 const bool lhsAligned( (*lhs).isAligned() );
109 const bool rhsAligned( (*rhs).isAligned() );
113 const size_t addon1 ( ( ( (*rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
114 const size_t equalShare1( (*rhs).rows() / threads.first + addon1 );
115 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
116 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
118 const size_t addon2 ( ( ( (*rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
119 const size_t equalShare2( (*rhs).columns() / threads.second + addon2 );
120 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
121 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
123 for(
size_t i=0UL; i<threads.first; ++i )
125 const size_t row( i*rowsPerThread );
127 if(
row >= (*lhs).rows() )
130 for(
size_t j=0UL; j<threads.second; ++j )
132 const size_t column( j*colsPerThread );
134 if(
column >= (*rhs).columns() )
137 const size_t m(
min( rowsPerThread, (*lhs).rows() -
row ) );
138 const size_t n(
min( colsPerThread, (*rhs).columns() -
column ) );
140 if( simdEnabled && lhsAligned && rhsAligned ) {
143 TheThreadBackend::schedule( target, source, op );
145 else if( simdEnabled && lhsAligned ) {
148 TheThreadBackend::schedule( target, source, op );
150 else if( simdEnabled && rhsAligned ) {
153 TheThreadBackend::schedule( target, source, op );
158 TheThreadBackend::schedule( target, source, op );
163 TheThreadBackend::wait();
187template<
typename MT1
192void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
200 const size_t addon1 ( ( ( (*rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
201 const size_t rowsPerThread( (*rhs).rows() / threads.first + addon1 );
203 const size_t addon2 ( ( ( (*rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
204 const size_t colsPerThread( (*rhs).columns() / threads.second + addon2 );
206 for(
size_t i=0UL; i<threads.first; ++i )
208 const size_t row( i*rowsPerThread );
210 if(
row >= (*lhs).rows() )
213 for(
size_t j=0UL; j<threads.second; ++j )
215 const size_t column( j*colsPerThread );
217 if(
column >= (*lhs).columns() )
220 const size_t m(
min( rowsPerThread, (*lhs).rows() -
row ) );
221 const size_t n(
min( colsPerThread, (*lhs).columns() -
column ) );
225 TheThreadBackend::schedule( target, source, op );
229 TheThreadBackend::wait();
261template<
typename MT1
265inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
266 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
273 assign( *lhs, *rhs );
297template<
typename MT1
301inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
302 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
315 assign( *lhs, *rhs );
318 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
353template<
typename MT1
357inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
358 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
365 addAssign( *lhs, *rhs );
389template<
typename MT1
393inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
394 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
407 addAssign( *lhs, *rhs );
410 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
445template<
typename MT1
449inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
450 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
457 subAssign( *lhs, *rhs );
482template<
typename MT1
486inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
487 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
500 subAssign( *lhs, *rhs );
503 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
538template<
typename MT1
542inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
543 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
550 schurAssign( *lhs, *rhs );
575template<
typename MT1
579inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
580 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
593 schurAssign( *lhs, *rhs );
596 threadAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ schurAssign( a, b ); } );
629template<
typename MT1
633inline auto smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
634 -> EnableIf_t< IsDenseMatrix_v<MT1> >
641 multAssign( *lhs, *rhs );
Header file for auxiliary alias declarations.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the IsDenseMatrix type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsSMPAssignable type trait.
Header file for the parallel section implementation.
Header file for the SIMD trait.
Constraint on the data type.
Header file for the serial section implementation.
Header file for the C++11 and Boost thread backend.
Header file for the SMP thread mapping functionality.
Header file for the DenseMatrix base class.
Header file for the SparseMatrix base class.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:137
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.
Definition: SMPAssignable.h:81
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:137
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.
Definition: ParallelSection.h:254
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.
Definition: SMP.h:124
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.
Definition: SMP.h:152
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
constexpr Unchecked unchecked
Global Unchecked instance.
Definition: Check.h:146
Header file for the matrix storage order types.
System settings for the shared-memory parallelization.
Header file for basic type definitions.
Header file for the generic min algorithm.
Header file for the implementation of the Submatrix view.