35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 96 template<
typename MT1
101 void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
107 using ET1 = ElementType_t<MT1>;
108 using ET2 = ElementType_t<MT2>;
110 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
111 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >::
size );
113 const bool lhsAligned( (~lhs).isAligned() );
114 const bool rhsAligned( (~rhs).isAligned() );
118 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
119 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
120 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
121 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
123 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
124 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
125 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
126 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
128 for(
size_t i=0UL; i<threads.first; ++i )
130 const size_t row( i*rowsPerThread );
135 for(
size_t j=0UL; j<threads.second; ++j )
137 const size_t column( j*colsPerThread );
142 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
145 if( simdEnabled && lhsAligned && rhsAligned ) {
148 TheThreadBackend::schedule( target, source, op );
150 else if( simdEnabled && lhsAligned ) {
153 TheThreadBackend::schedule( target, source, op );
155 else if( simdEnabled && rhsAligned ) {
158 TheThreadBackend::schedule( target, source, op );
163 TheThreadBackend::schedule( target, source, op );
168 TheThreadBackend::wait();
192 template<
typename MT1
197 void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
205 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
206 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
208 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
209 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
211 for(
size_t i=0UL; i<threads.first; ++i )
213 const size_t row( i*rowsPerThread );
218 for(
size_t j=0UL; j<threads.second; ++j )
220 const size_t column( j*colsPerThread );
225 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
230 TheThreadBackend::schedule( target, source, op );
234 TheThreadBackend::wait();
266 template<
typename MT1
270 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
271 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
278 assign( ~lhs, ~rhs );
302 template<
typename MT1
306 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
307 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
320 assign( ~lhs, ~rhs );
323 threadAssign( ~lhs, ~rhs, Assign() );
358 template<
typename MT1
362 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
363 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
370 addAssign( ~lhs, ~rhs );
394 template<
typename MT1
398 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
399 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
412 addAssign( ~lhs, ~rhs );
415 threadAssign( ~lhs, ~rhs, AddAssign() );
450 template<
typename MT1
454 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
455 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
462 subAssign( ~lhs, ~rhs );
487 template<
typename MT1
491 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
492 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
505 subAssign( ~lhs, ~rhs );
508 threadAssign( ~lhs, ~rhs, SubAssign() );
543 template<
typename MT1
547 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
548 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
555 schurAssign( ~lhs, ~rhs );
580 template<
typename MT1
584 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
585 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
598 schurAssign( ~lhs, ~rhs );
601 threadAssign( ~lhs, ~rhs, SchurAssign() );
634 template<
typename MT1
638 inline auto smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
639 -> EnableIf_t< IsDenseMatrix_v<MT1> >
646 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the Assign functor.
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:152
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the AddAssign functor.
Header file for the SIMD trait.
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
Header file for the DenseMatrix base class.
Header file for the serial section implementation.
Header file for the SchurAssign functor.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:124
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
Header file for the MultAssign functor.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:191
Constraint on the data type.
Header file for the function trace functionality.