35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 91 template<
typename MT1
96 void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
102 using ET1 = ElementType_t<MT1>;
103 using ET2 = ElementType_t<MT2>;
105 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
106 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >::
size );
108 const bool lhsAligned( (~lhs).isAligned() );
109 const bool rhsAligned( (~rhs).isAligned() );
113 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
114 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
115 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
116 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
118 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
119 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
120 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
121 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
123 for(
size_t i=0UL; i<threads.first; ++i )
125 const size_t row( i*rowsPerThread );
130 for(
size_t j=0UL; j<threads.second; ++j )
132 const size_t column( j*colsPerThread );
137 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
140 if( simdEnabled && lhsAligned && rhsAligned ) {
143 TheThreadBackend::schedule( target, source, op );
145 else if( simdEnabled && lhsAligned ) {
148 TheThreadBackend::schedule( target, source, op );
150 else if( simdEnabled && rhsAligned ) {
153 TheThreadBackend::schedule( target, source, op );
158 TheThreadBackend::schedule( target, source, op );
163 TheThreadBackend::wait();
187 template<
typename MT1
192 void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
200 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
201 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
203 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
204 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
206 for(
size_t i=0UL; i<threads.first; ++i )
208 const size_t row( i*rowsPerThread );
213 for(
size_t j=0UL; j<threads.second; ++j )
215 const size_t column( j*colsPerThread );
220 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
225 TheThreadBackend::schedule( target, source, op );
229 TheThreadBackend::wait();
261 template<
typename MT1
265 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
266 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
273 assign( ~lhs, ~rhs );
297 template<
typename MT1
301 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
302 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
315 assign( ~lhs, ~rhs );
318 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
353 template<
typename MT1
357 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
358 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
365 addAssign( ~lhs, ~rhs );
389 template<
typename MT1
393 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
394 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
407 addAssign( ~lhs, ~rhs );
410 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
445 template<
typename MT1
449 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
450 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
457 subAssign( ~lhs, ~rhs );
482 template<
typename MT1
486 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
487 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
500 subAssign( ~lhs, ~rhs );
503 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
538 template<
typename MT1
542 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
543 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
550 schurAssign( ~lhs, ~rhs );
575 template<
typename MT1
579 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
580 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
593 schurAssign( ~lhs, ~rhs );
596 threadAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ schurAssign( a, b ); } );
629 template<
typename MT1
633 inline auto smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
634 -> EnableIf_t< IsDenseMatrix_v<MT1> >
641 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for basic type definitions.
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:152
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the SIMD trait.
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
Header file for the DenseMatrix base class.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:124
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:191
Constraint on the data type.
Header file for the function trace functionality.