35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_ 93 template<
typename MT1
97 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
103 using ET1 = ElementType_<MT1>;
104 using ET2 = ElementType_<MT2>;
106 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
107 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
109 const bool lhsAligned( (~lhs).isAligned() );
110 const bool rhsAligned( (~rhs).isAligned() );
114 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
115 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
116 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
117 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
119 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
120 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
121 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
122 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
124 for(
size_t i=0UL; i<threads.first; ++i )
126 const size_t row( i*rowsPerThread );
131 for(
size_t j=0UL; j<threads.second; ++j )
133 const size_t column( j*colsPerThread );
138 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
141 if( simdEnabled && lhsAligned && rhsAligned ) {
142 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
143 TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
145 else if( simdEnabled && lhsAligned ) {
146 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
147 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
149 else if( simdEnabled && rhsAligned ) {
150 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
151 TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
154 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
155 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
160 TheThreadBackend::wait();
183 template<
typename MT1
187 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
195 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
196 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
198 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
199 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
201 for(
size_t i=0UL; i<threads.first; ++i )
203 const size_t row( i*rowsPerThread );
208 for(
size_t j=0UL; j<threads.second; ++j )
210 const size_t column( j*colsPerThread );
215 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
218 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
219 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
223 TheThreadBackend::wait();
247 template<
typename MT1
251 inline EnableIf_< And< IsDenseMatrix<MT1>
252 , Or< Not< IsSMPAssignable<MT1> >
253 , Not< IsSMPAssignable<MT2> > > > >
254 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
261 assign( ~lhs, ~rhs );
285 template<
typename MT1
289 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
290 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
303 assign( ~lhs, ~rhs );
306 smpAssign_backend( ~lhs, ~rhs );
339 template<
typename MT1
343 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
349 using ET1 = ElementType_<MT1>;
350 using ET2 = ElementType_<MT2>;
352 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
353 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
355 const bool lhsAligned( (~lhs).isAligned() );
356 const bool rhsAligned( (~rhs).isAligned() );
360 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
361 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
362 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
363 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
365 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
366 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
367 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
368 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
370 for(
size_t i=0UL; i<threads.first; ++i )
372 const size_t row( i*rowsPerThread );
377 for(
size_t j=0UL; j<threads.second; ++j )
379 const size_t column( j*colsPerThread );
384 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
387 if( simdEnabled && lhsAligned && rhsAligned ) {
388 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
389 TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
391 else if( simdEnabled && lhsAligned ) {
392 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
393 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
395 else if( simdEnabled && rhsAligned ) {
396 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
397 TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
400 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
401 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
406 TheThreadBackend::wait();
429 template<
typename MT1
433 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
441 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
442 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
444 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
445 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
447 for(
size_t i=0UL; i<threads.first; ++i )
449 const size_t row( i*rowsPerThread );
454 for(
size_t j=0UL; j<threads.second; ++j )
456 const size_t column( j*colsPerThread );
461 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
464 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
465 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
469 TheThreadBackend::wait();
494 template<
typename MT1
498 inline EnableIf_< And< IsDenseMatrix<MT1>
499 , Or< Not< IsSMPAssignable<MT1> >
500 , Not< IsSMPAssignable<MT2> > > > >
501 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
508 addAssign( ~lhs, ~rhs );
532 template<
typename MT1
536 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
537 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
550 addAssign( ~lhs, ~rhs );
553 smpAddAssign_backend( ~lhs, ~rhs );
586 template<
typename MT1
590 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
596 using ET1 = ElementType_<MT1>;
597 using ET2 = ElementType_<MT2>;
599 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
600 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
602 const bool lhsAligned( (~lhs).isAligned() );
603 const bool rhsAligned( (~rhs).isAligned() );
607 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
608 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
609 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
610 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
612 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
613 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
614 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
615 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
617 for(
size_t i=0UL; i<threads.first; ++i )
619 const size_t row( i*rowsPerThread );
624 for(
size_t j=0UL; j<threads.second; ++j )
626 const size_t column( j*colsPerThread );
631 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
634 if( simdEnabled && lhsAligned && rhsAligned ) {
635 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
636 TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
638 else if( simdEnabled && lhsAligned ) {
639 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
640 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
642 else if( simdEnabled && rhsAligned ) {
643 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
644 TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
647 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
648 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
653 TheThreadBackend::wait();
676 template<
typename MT1
680 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
688 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
689 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
691 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
692 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
694 for(
size_t i=0UL; i<threads.first; ++i )
696 const size_t row( i*rowsPerThread );
701 for(
size_t j=0UL; j<threads.second; ++j )
703 const size_t column( j*colsPerThread );
708 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
711 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
712 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
716 TheThreadBackend::wait();
741 template<
typename MT1
745 inline EnableIf_< And< IsDenseMatrix<MT1>
746 , Or< Not< IsSMPAssignable<MT1> >
747 , Not< IsSMPAssignable<MT2> > > > >
748 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
755 subAssign( ~lhs, ~rhs );
780 template<
typename MT1
784 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
785 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
798 subAssign( ~lhs, ~rhs );
801 smpSubAssign_backend( ~lhs, ~rhs );
834 template<
typename MT1
838 void smpSchurAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
844 using ET1 = ElementType_<MT1>;
845 using ET2 = ElementType_<MT2>;
847 constexpr
bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
848 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::
size );
850 const bool lhsAligned( (~lhs).isAligned() );
851 const bool rhsAligned( (~rhs).isAligned() );
855 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
856 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
857 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
858 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
860 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
861 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
862 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
863 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
865 for(
size_t i=0UL; i<threads.first; ++i )
867 const size_t row( i*rowsPerThread );
872 for(
size_t j=0UL; j<threads.second; ++j )
874 const size_t column( j*colsPerThread );
879 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
882 if( simdEnabled && lhsAligned && rhsAligned ) {
883 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
884 TheThreadBackend::scheduleSchurAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
886 else if( simdEnabled && lhsAligned ) {
887 auto target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
888 TheThreadBackend::scheduleSchurAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
890 else if( simdEnabled && rhsAligned ) {
891 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
892 TheThreadBackend::scheduleSchurAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
895 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
896 TheThreadBackend::scheduleSchurAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
901 TheThreadBackend::wait();
924 template<
typename MT1
928 void smpSchurAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
936 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
937 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
939 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
940 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
942 for(
size_t i=0UL; i<threads.first; ++i )
944 const size_t row( i*rowsPerThread );
949 for(
size_t j=0UL; j<threads.second; ++j )
951 const size_t column( j*colsPerThread );
956 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
959 auto target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
960 TheThreadBackend::scheduleSchurAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
964 TheThreadBackend::wait();
989 template<
typename MT1
993 inline EnableIf_< And< IsDenseMatrix<MT1>
994 , Or< Not< IsSMPAssignable<MT1> >
995 , Not< IsSMPAssignable<MT2> > > > >
996 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
1003 schurAssign( ~lhs, ~rhs );
1028 template<
typename MT1
1032 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
1033 smpSchurAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
1046 schurAssign( ~lhs, ~rhs );
1049 smpSchurAssign_backend( ~lhs, ~rhs );
1082 template<
typename MT1
1086 inline EnableIf_< IsDenseMatrix<MT1> >
1087 smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
1094 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Column< MT > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:124
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Row< MT > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:124
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:340
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:324
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.