35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 99 template<
typename MT1
104 void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
110 using ET1 = ElementType_<MT1>;
111 using ET2 = ElementType_<MT2>;
113 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
114 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
116 const bool lhsAligned( (~lhs).isAligned() );
117 const bool rhsAligned( (~rhs).isAligned() );
121 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
122 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
123 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
124 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
126 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
127 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
128 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
129 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
131 for(
size_t i=0UL; i<threads.first; ++i )
133 const size_t row( i*rowsPerThread );
138 for(
size_t j=0UL; j<threads.second; ++j )
140 const size_t column( j*colsPerThread );
145 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
148 if( simdEnabled && lhsAligned && rhsAligned ) {
151 TheThreadBackend::schedule( target, source, op );
153 else if( simdEnabled && lhsAligned ) {
156 TheThreadBackend::schedule( target, source, op );
158 else if( simdEnabled && rhsAligned ) {
161 TheThreadBackend::schedule( target, source, op );
166 TheThreadBackend::schedule( target, source, op );
171 TheThreadBackend::wait();
195 template<
typename MT1
200 void threadAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
208 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
209 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
211 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
212 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
214 for(
size_t i=0UL; i<threads.first; ++i )
216 const size_t row( i*rowsPerThread );
221 for(
size_t j=0UL; j<threads.second; ++j )
223 const size_t column( j*colsPerThread );
228 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
233 TheThreadBackend::schedule( target, source, op );
237 TheThreadBackend::wait();
269 template<
typename MT1
273 inline EnableIf_< And< IsDenseMatrix<MT1>
274 , Or< Not< IsSMPAssignable<MT1> >
275 , Not< IsSMPAssignable<MT2> > > > >
276 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
283 assign( ~lhs, ~rhs );
307 template<
typename MT1
311 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
312 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
325 assign( ~lhs, ~rhs );
328 threadAssign( ~lhs, ~rhs, Assign() );
363 template<
typename MT1
367 inline EnableIf_< And< IsDenseMatrix<MT1>
368 , Or< Not< IsSMPAssignable<MT1> >
369 , Not< IsSMPAssignable<MT2> > > > >
370 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
377 addAssign( ~lhs, ~rhs );
401 template<
typename MT1
405 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
406 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
419 addAssign( ~lhs, ~rhs );
422 threadAssign( ~lhs, ~rhs, AddAssign() );
457 template<
typename MT1
461 inline EnableIf_< And< IsDenseMatrix<MT1>
462 , Or< Not< IsSMPAssignable<MT1> >
463 , Not< IsSMPAssignable<MT2> > > > >
464 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
471 subAssign( ~lhs, ~rhs );
496 template<
typename MT1
500 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
501 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
514 subAssign( ~lhs, ~rhs );
517 threadAssign( ~lhs, ~rhs, SubAssign() );
552 template<
typename MT1
556 inline EnableIf_< And< IsDenseMatrix<MT1>
557 , Or< Not< IsSMPAssignable<MT1> >
558 , Not< IsSMPAssignable<MT2> > > > >
559 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
566 schurAssign( ~lhs, ~rhs );
591 template<
typename MT1
595 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
596 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
609 schurAssign( ~lhs, ~rhs );
612 threadAssign( ~lhs, ~rhs, SchurAssign() );
645 template<
typename MT1
649 inline EnableIf_< IsDenseMatrix<MT1> >
650 smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
657 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:131
Headerfile for the generic min algorithm.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the Assign functor.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the AddAssign functor.
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:506
Header file for the serial section implementation.
Header file for the SchurAssign functor.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:490
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the MultAssign functor.
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.