35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 94 template<
typename MT1
98 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
104 typedef ElementType_<MT1> ET1;
105 typedef ElementType_<MT2> ET2;
106 typedef SubmatrixExprTrait_<MT1,aligned> AlignedTarget;
107 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
109 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
110 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
112 const bool lhsAligned( (~lhs).isAligned() );
113 const bool rhsAligned( (~rhs).isAligned() );
117 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
118 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
119 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
120 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
122 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
123 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
124 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
125 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
127 for(
size_t i=0UL; i<threads.first; ++i )
129 const size_t row( i*rowsPerThread );
134 for(
size_t j=0UL; j<threads.second; ++j )
136 const size_t column( j*colsPerThread );
141 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
144 if( simdEnabled && lhsAligned && rhsAligned ) {
145 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
146 TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
148 else if( simdEnabled && lhsAligned ) {
149 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
150 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
152 else if( simdEnabled && rhsAligned ) {
153 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
154 TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
157 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
158 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
163 TheThreadBackend::wait();
186 template<
typename MT1
190 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
196 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
200 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
201 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
203 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
204 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
206 for(
size_t i=0UL; i<threads.first; ++i )
208 const size_t row( i*rowsPerThread );
213 for(
size_t j=0UL; j<threads.second; ++j )
215 const size_t column( j*colsPerThread );
220 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
223 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
224 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
228 TheThreadBackend::wait();
252 template<
typename MT1
256 inline EnableIf_< And< IsDenseMatrix<MT1>
257 , Or< Not< IsSMPAssignable<MT1> >
258 , Not< IsSMPAssignable<MT2> > > > >
259 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
266 assign( ~lhs, ~rhs );
290 template<
typename MT1
294 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
295 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
308 assign( ~lhs, ~rhs );
311 smpAssign_backend( ~lhs, ~rhs );
344 template<
typename MT1
348 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
354 typedef ElementType_<MT1> ET1;
355 typedef ElementType_<MT2> ET2;
356 typedef SubmatrixExprTrait_<MT1,aligned> AlignedTarget;
357 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
359 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
360 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
362 const bool lhsAligned( (~lhs).isAligned() );
363 const bool rhsAligned( (~rhs).isAligned() );
367 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
368 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
369 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
370 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
372 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
373 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
374 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
375 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
377 for(
size_t i=0UL; i<threads.first; ++i )
379 const size_t row( i*rowsPerThread );
384 for(
size_t j=0UL; j<threads.second; ++j )
386 const size_t column( j*colsPerThread );
391 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
394 if( simdEnabled && lhsAligned && rhsAligned ) {
395 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
396 TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
398 else if( simdEnabled && lhsAligned ) {
399 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
400 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
402 else if( simdEnabled && rhsAligned ) {
403 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
404 TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
407 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
408 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
413 TheThreadBackend::wait();
436 template<
typename MT1
440 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
446 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
450 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
451 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
453 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
454 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
456 for(
size_t i=0UL; i<threads.first; ++i )
458 const size_t row( i*rowsPerThread );
463 for(
size_t j=0UL; j<threads.second; ++j )
465 const size_t column( j*colsPerThread );
470 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
473 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
474 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
478 TheThreadBackend::wait();
503 template<
typename MT1
507 inline EnableIf_< And< IsDenseMatrix<MT1>
508 , Or< Not< IsSMPAssignable<MT1> >
509 , Not< IsSMPAssignable<MT2> > > > >
510 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
517 addAssign( ~lhs, ~rhs );
541 template<
typename MT1
545 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
546 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
559 addAssign( ~lhs, ~rhs );
562 smpAddAssign_backend( ~lhs, ~rhs );
595 template<
typename MT1
599 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
605 typedef ElementType_<MT1> ET1;
606 typedef ElementType_<MT2> ET2;
607 typedef SubmatrixExprTrait_<MT1,aligned> AlignedTarget;
608 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
610 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
611 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
613 const bool lhsAligned( (~lhs).isAligned() );
614 const bool rhsAligned( (~rhs).isAligned() );
618 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
619 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
620 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
621 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
623 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
624 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
625 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
626 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
628 for(
size_t i=0UL; i<threads.first; ++i )
630 const size_t row( i*rowsPerThread );
635 for(
size_t j=0UL; j<threads.second; ++j )
637 const size_t column( j*colsPerThread );
642 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
645 if( simdEnabled && lhsAligned && rhsAligned ) {
646 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
647 TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
649 else if( simdEnabled && lhsAligned ) {
650 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
651 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
653 else if( simdEnabled && rhsAligned ) {
654 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
655 TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
658 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
659 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
664 TheThreadBackend::wait();
687 template<
typename MT1
691 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
697 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
701 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
702 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
704 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
705 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
707 for(
size_t i=0UL; i<threads.first; ++i )
709 const size_t row( i*rowsPerThread );
714 for(
size_t j=0UL; j<threads.second; ++j )
716 const size_t column( j*colsPerThread );
721 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
724 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
725 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
729 TheThreadBackend::wait();
754 template<
typename MT1
758 inline EnableIf_< And< IsDenseMatrix<MT1>
759 , Or< Not< IsSMPAssignable<MT1> >
760 , Not< IsSMPAssignable<MT2> > > > >
761 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
768 subAssign( ~lhs, ~rhs );
793 template<
typename MT1
797 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
798 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
811 subAssign( ~lhs, ~rhs );
814 smpSubAssign_backend( ~lhs, ~rhs );
847 template<
typename MT1
851 inline EnableIf_< IsDenseMatrix<MT1> >
852 smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
859 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
Header file for mathematical functions.
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:261
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1755
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:336
Header file for the serial section implementation.
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, RowExprTrait_< MT > > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:128
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, ColumnExprTrait_< MT > > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:128
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for the SubmatrixExprTrait class template.
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:93
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:320
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.