35#ifndef _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_
36#define _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_
43#include <hpx/include/parallel_for_loop.hpp>
97void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
99#if HPX_VERSION_FULL >= 0x010500
101 using hpx::execution::par;
103 using hpx::parallel::for_loop;
104 using hpx::parallel::execution::par;
109 using ET1 = ElementType_t<MT1>;
110 using ET2 = ElementType_t<MT2>;
112 constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
113 constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >
::size );
115 const bool lhsAligned( (*lhs).isAligned() );
116 const bool rhsAligned( (*rhs).isAligned() );
119 const ThreadMapping threadmap( createThreadMapping( threads, *rhs ) );
121 const size_t addon1 ( ( ( (*rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
122 const size_t equalShare1( (*rhs).rows() / threadmap.first + addon1 );
123 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
124 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
126 const size_t addon2 ( ( ( (*rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
127 const size_t equalShare2( (*rhs).columns() / threadmap.second + addon2 );
128 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
129 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
131 for_loop( par,
size_t(0), threads, [&](
int i)
133 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
134 const size_t column( ( i % threadmap.second ) * colsPerThread );
136 if(
row >= (*rhs).rows() ||
column >= (*rhs).columns() )
139 const size_t m(
min( rowsPerThread, (*rhs).rows() -
row ) );
140 const size_t n(
min( colsPerThread, (*rhs).columns() -
column ) );
142 if( simdEnabled && lhsAligned && rhsAligned ) {
143 auto target( submatrix<aligned>( *lhs,
row,
column, m, n ) );
144 const auto source( submatrix<aligned>( *rhs,
row,
column, m, n ) );
145 op( target, source );
147 else if( simdEnabled && lhsAligned ) {
148 auto target( submatrix<aligned>( *lhs,
row,
column, m, n ) );
149 const auto source( submatrix<unaligned>( *rhs,
row,
column, m, n ) );
150 op( target, source );
152 else if( simdEnabled && rhsAligned ) {
153 auto target( submatrix<unaligned>( *lhs,
row,
column, m, n ) );
154 const auto source( submatrix<aligned>( *rhs,
row,
column, m, n ) );
155 op( target, source );
158 auto target( submatrix<unaligned>( *lhs,
row,
column, m, n ) );
159 const auto source( submatrix<unaligned>( *rhs,
row,
column, m, n ) );
160 op( target, source );
185template<
typename MT1
190void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
192#if HPX_VERSION_FULL >= 0x010500
194 using hpx::execution::par;
196 using hpx::parallel::for_loop;
197 using hpx::parallel::execution::par;
203 const ThreadMapping threadmap( createThreadMapping( threads, *rhs ) );
205 const size_t addon1 ( ( ( (*rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
206 const size_t rowsPerThread( (*rhs).rows() / threadmap.first + addon1 );
208 const size_t addon2 ( ( ( (*rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
209 const size_t colsPerThread( (*rhs).columns() / threadmap.second + addon2 );
211 for_loop( par,
size_t(0), threads, [&](
int i)
213 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
214 const size_t column( ( i % threadmap.second ) * colsPerThread );
216 if(
row >= (*rhs).rows() ||
column >= (*rhs).columns() )
219 const size_t m(
min( rowsPerThread, (*lhs).rows() -
row ) );
220 const size_t n(
min( colsPerThread, (*lhs).columns() -
column ) );
222 auto target( submatrix<unaligned>( *lhs,
row,
column, m, n ) );
223 const auto source( submatrix<unaligned>( *rhs,
row,
column, m, n ) );
224 op( target, source );
257template<
typename MT1
261inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
262 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
269 assign( *lhs, *rhs );
293template<
typename MT1
297inline auto smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
298 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
309 assign( *lhs, *rhs );
312 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ assign( a, b ); } );
345template<
typename MT1
349inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
350 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
357 addAssign( *lhs, *rhs );
381template<
typename MT1
385inline auto smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
386 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
397 addAssign( *lhs, *rhs );
400 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ addAssign( a, b ); } );
433template<
typename MT1
437inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
438 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
445 subAssign( *lhs, *rhs );
469template<
typename MT1
473inline auto smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
474 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
485 subAssign( *lhs, *rhs );
488 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ subAssign( a, b ); } );
521template<
typename MT1
525inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
526 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
533 schurAssign( *lhs, *rhs );
557template<
typename MT1
561inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
562 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
573 schurAssign( *lhs, *rhs );
576 hpxAssign( *lhs, *rhs, [](
auto& a,
const auto& b ){ schurAssign( a, b ); } );
607template<
typename MT1
611inline auto smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
612 -> EnableIf_t< IsDenseMatrix_v<MT1> >
619 multAssign( *lhs, *rhs );
Header file for auxiliary alias declarations.
Header file for the alignment flag enumeration.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the IsDenseMatrix type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsSMPAssignable type trait.
Header file for the SIMD trait.
Constraint on the data type.
Header file for the serial section implementation.
Header file for the SMP thread mapping functionality.
Header file for the DenseMatrix base class.
Header file for the SparseMatrix base class.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:137
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.
Definition: SMPAssignable.h:81
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:137
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
BLAZE_ALWAYS_INLINE size_t getNumThreads()
Returns the number of threads used for thread parallel operations.
Definition: Functions.h:77
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_HPX_PARALLEL_MODE
Compilation switch for the HPX parallelization.
Definition: SMP.h:96
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
Header file for the matrix storage order types.
Header file for SMP utility functions.
System settings for the shared-memory parallelization.
Header file for basic type definitions.
Header file for the generic min algorithm.
Header file for the implementation of the Submatrix view.