35 #ifndef _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_HPX_DENSEMATRIX_H_ 43 #include <hpx/include/parallel_for_loop.hpp> 95 template<
typename MT1
100 void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs, OP op )
102 using hpx::parallel::for_loop;
103 using hpx::parallel::execution::par;
109 using ET1 = ElementType_<MT1>;
110 using ET2 = ElementType_<MT2>;
112 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
113 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
115 const bool lhsAligned( (~lhs).isAligned() );
116 const bool rhsAligned( (~rhs).isAligned() );
119 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
121 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
122 const size_t equalShare1( (~rhs).
rows() / threadmap.first + addon1 );
123 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
124 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
126 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
127 const size_t equalShare2( (~rhs).
columns() / threadmap.second + addon2 );
128 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
129 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
131 for_loop( par,
size_t(0), threads, [&](
int i)
133 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
134 const size_t column( ( i % threadmap.second ) * colsPerThread );
139 const size_t m(
min( rowsPerThread, (~rhs).
rows() -
row ) );
142 if( simdEnabled && lhsAligned && rhsAligned ) {
143 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
144 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
145 op( target, source );
147 else if( simdEnabled && lhsAligned ) {
148 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
149 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
150 op( target, source );
152 else if( simdEnabled && rhsAligned ) {
153 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
154 const auto source( submatrix<aligned>( ~rhs,
row,
column, m, n ) );
155 op( target, source );
158 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
159 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
160 op( target, source );
185 template<
typename MT1
190 void hpxAssign( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs, OP op )
192 using hpx::parallel::for_loop;
193 using hpx::parallel::execution::par;
200 const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
202 const size_t addon1 ( ( ( (~rhs).
rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
203 const size_t rowsPerThread( (~rhs).
rows() / threadmap.first + addon1 );
205 const size_t addon2 ( ( ( (~rhs).
columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
206 const size_t colsPerThread( (~rhs).
columns() / threadmap.second + addon2 );
208 for_loop( par,
size_t(0), threads, [&](
int i)
210 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
211 const size_t column( ( i % threadmap.second ) * colsPerThread );
216 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
219 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
220 const auto source( submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
221 op( target, source );
254 template<
typename MT1
258 inline EnableIf_< And< IsDenseMatrix<MT1>
259 , Or< Not< IsSMPAssignable<MT1> >
260 , Not< IsSMPAssignable<MT2> > > > >
261 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
268 assign( ~lhs, ~rhs );
292 template<
typename MT1
296 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
297 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
308 assign( ~lhs, ~rhs );
311 hpxAssign( ~lhs, ~rhs, Assign() );
344 template<
typename MT1
348 inline EnableIf_< And< IsDenseMatrix<MT1>
349 , Or< Not< IsSMPAssignable<MT1> >
350 , Not< IsSMPAssignable<MT2> > > > >
351 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
358 addAssign( ~lhs, ~rhs );
382 template<
typename MT1
386 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
387 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
398 addAssign( ~lhs, ~rhs );
401 hpxAssign( ~lhs, ~rhs, AddAssign() );
434 template<
typename MT1
438 inline EnableIf_< And< IsDenseMatrix<MT1>
439 , Or< Not< IsSMPAssignable<MT1> >
440 , Not< IsSMPAssignable<MT2> > > > >
441 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
448 subAssign( ~lhs, ~rhs );
472 template<
typename MT1
476 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
477 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
488 subAssign( ~lhs, ~rhs );
491 hpxAssign( ~lhs, ~rhs, SubAssign() );
524 template<
typename MT1
528 inline EnableIf_< And< IsDenseMatrix<MT1>
529 , Or< Not< IsSMPAssignable<MT1> >
530 , Not< IsSMPAssignable<MT2> > > > >
531 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
538 schurAssign( ~lhs, ~rhs );
562 template<
typename MT1
566 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
567 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
578 schurAssign( ~lhs, ~rhs );
581 hpxAssign( ~lhs, ~rhs, SchurAssign() );
612 template<
typename MT1
616 inline EnableIf_< IsDenseMatrix<MT1> >
617 smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
624 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:131
Headerfile for the generic min algorithm.
Header file for the alignment flag values.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
#define BLAZE_HPX_PARALLEL_MODE
Compilation switch for the HPX parallelization.This compilation switch enables/disables the paralleli...
Definition: SMP.h:148
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:506
Header file for the serial section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:490
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for SMP utility functions.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
BLAZE_ALWAYS_INLINE size_t getNumThreads()
Returns the number of threads used for thread parallel operations.
Definition: Functions.h:77
Constraint on the data type.
Header file for the function trace functionality.