35 #ifndef _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_ 43 #include <hpx/include/parallel_for_loop.hpp> 92 template<
typename MT1
97 void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
99 using hpx::parallel::for_loop;
100 using hpx::parallel::execution::par;
104 using ET1 = ElementType_t<MT1>;
105 using ET2 = ElementType_t<MT2>;
107 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
108 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >::
size );
110 const bool lhsAligned( (~lhs).isAligned() );
111 const bool rhsAligned( (~rhs).isAligned() );
114 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
116 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
117 const size_t equalShare1( (~rhs).
rows() / threadmap.first + addon1 );
118 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
119 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
121 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
122 const size_t equalShare2( (~rhs).
columns() / threadmap.second + addon2 );
123 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
124 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
126 for_loop( par,
size_t(0), threads, [&](
int i)
128 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
129 const size_t column( ( i % threadmap.second ) * colsPerThread );
134 const size_t m(
min( rowsPerThread, (~rhs).
rows() -
row ) );
137 if( simdEnabled && lhsAligned && rhsAligned ) {
138 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
139 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
140 op( target, source );
142 else if( simdEnabled && lhsAligned ) {
143 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
144 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
145 op( target, source );
147 else if( simdEnabled && rhsAligned ) {
148 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
149 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
150 op( target, source );
153 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
154 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
155 op( target, source );
180 template<
typename MT1
185 void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
187 using hpx::parallel::for_loop;
188 using hpx::parallel::execution::par;
193 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
195 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
196 const size_t rowsPerThread( (~rhs).
rows() / threadmap.first + addon1 );
198 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
199 const size_t colsPerThread( (~rhs).
columns() / threadmap.second + addon2 );
201 for_loop( par,
size_t(0), threads, [&](
int i)
203 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
204 const size_t column( ( i % threadmap.second ) * colsPerThread );
209 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
212 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
213 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
214 op( target, source );
247 template<
typename MT1
251 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
252 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
259 assign( ~lhs, ~rhs );
283 template<
typename MT1
287 inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
288 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
299 assign( ~lhs, ~rhs );
302 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
335 template<
typename MT1
339 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
340 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
347 addAssign( ~lhs, ~rhs );
371 template<
typename MT1
375 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
376 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
387 addAssign( ~lhs, ~rhs );
390 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
423 template<
typename MT1
427 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
428 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
435 subAssign( ~lhs, ~rhs );
459 template<
typename MT1
463 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
464 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
475 subAssign( ~lhs, ~rhs );
478 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
511 template<
typename MT1
515 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
516 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
523 schurAssign( ~lhs, ~rhs );
547 template<
typename MT1
551 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
552 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
563 schurAssign( ~lhs, ~rhs );
566 hpxAssign( ~lhs, ~rhs, [](
auto& a,
const auto& b ){ schurAssign( a, b ); } );
597 template<
typename MT1
601 inline auto smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
602 -> EnableIf_t< IsDenseMatrix_v<MT1> >
609 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the alignment flag values.
Header file for basic type definitions.
Header file for the SIMD trait.
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
#define BLAZE_HPX_PARALLEL_MODE
Compilation switch for the HPX parallelization.This compilation switch enables/disables the paralleli...
Definition: SMP.h:96
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
Header file for the DenseMatrix base class.
Header file for the serial section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for SMP utility functions.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:191
BLAZE_ALWAYS_INLINE size_t getNumThreads()
Returns the number of threads used for thread parallel operations.
Definition: Functions.h:77
Constraint on the data type.
Header file for the function trace functionality.