35 #ifndef _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_ 43 #include <hpx/include/parallel_for_loop.hpp> 97 template<
typename MT1
102 void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
104 using hpx::parallel::for_loop;
105 using hpx::parallel::execution::par;
109 using ET1 = ElementType_t<MT1>;
110 using ET2 = ElementType_t<MT2>;
112 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
113 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >::
size );
115 const bool lhsAligned( (~lhs).isAligned() );
116 const bool rhsAligned( (~rhs).isAligned() );
119 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
121 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
122 const size_t equalShare1( (~rhs).
rows() / threadmap.first + addon1 );
123 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
124 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
126 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
127 const size_t equalShare2( (~rhs).
columns() / threadmap.second + addon2 );
128 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
129 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
131 for_loop( par,
size_t(0), threads, [&](
int i)
133 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
134 const size_t column( ( i % threadmap.second ) * colsPerThread );
139 const size_t m(
min( rowsPerThread, (~rhs).
rows() -
row ) );
142 if( simdEnabled && lhsAligned && rhsAligned ) {
143 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
144 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
145 op( target, source );
147 else if( simdEnabled && lhsAligned ) {
148 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
149 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
150 op( target, source );
152 else if( simdEnabled && rhsAligned ) {
153 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
154 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
155 op( target, source );
158 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
159 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
160 op( target, source );
185 template<
typename MT1
190 void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
192 using hpx::parallel::for_loop;
193 using hpx::parallel::execution::par;
198 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
200 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
201 const size_t rowsPerThread( (~rhs).
rows() / threadmap.first + addon1 );
203 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
204 const size_t colsPerThread( (~rhs).
columns() / threadmap.second + addon2 );
206 for_loop( par,
size_t(0), threads, [&](
int i)
208 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
209 const size_t column( ( i % threadmap.second ) * colsPerThread );
214 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
217 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
218 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
219 op( target, source );
252 template<
typename MT1
256 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
257 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
264 assign( ~lhs, ~rhs );
288 template<
typename MT1
292 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
293 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
304 assign( ~lhs, ~rhs );
307 hpxAssign( ~lhs, ~rhs, Assign() );
340 template<
typename MT1
344 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
345 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
352 addAssign( ~lhs, ~rhs );
376 template<
typename MT1
380 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
381 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
392 addAssign( ~lhs, ~rhs );
395 hpxAssign( ~lhs, ~rhs, AddAssign() );
428 template<
typename MT1
432 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
433 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
440 subAssign( ~lhs, ~rhs );
464 template<
typename MT1
468 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
469 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
480 subAssign( ~lhs, ~rhs );
483 hpxAssign( ~lhs, ~rhs, SubAssign() );
516 template<
typename MT1
520 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
521 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
528 schurAssign( ~lhs, ~rhs );
552 template<
typename MT1
556 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
557 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
568 schurAssign( ~lhs, ~rhs );
571 hpxAssign( ~lhs, ~rhs, SchurAssign() );
602 template<
typename MT1
606 inline auto smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
607 -> EnableIf_t< IsDenseMatrix_v<MT1> >
614 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the alignment flag values.
Header file for basic type definitions.
Header file for the Assign functor.
Header file for the AddAssign functor.
Header file for the SIMD trait.
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
#define BLAZE_HPX_PARALLEL_MODE
Compilation switch for the HPX parallelization.This compilation switch enables/disables the paralleli...
Definition: SMP.h:96
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
Header file for the DenseMatrix base class.
Header file for the serial section implementation.
Header file for the SchurAssign functor.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
Header file for the MultAssign functor.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for SMP utility functions.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:191
BLAZE_ALWAYS_INLINE size_t getNumThreads()
Returns the number of threads used for thread parallel operations.
Definition: Functions.h:77
Constraint on the data type.
Header file for the function trace functionality.