35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
95 template<
typename MT1
99 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
105 typedef ElementType_<MT1> ET1;
106 typedef ElementType_<MT2> ET2;
107 typedef SubmatrixExprTrait_<MT1,aligned> AlignedTarget;
108 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
110 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<MT1> >
::size };
112 const bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSame<ET1,ET2>::value );
113 const bool lhsAligned ( (~lhs).isAligned() );
114 const bool rhsAligned ( (~rhs).isAligned() );
118 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
119 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
120 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
121 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
123 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
124 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
125 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
126 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
128 for(
size_t i=0UL; i<threads.first; ++i )
130 const size_t row( i*rowsPerThread );
135 for(
size_t j=0UL; j<threads.second; ++j )
137 const size_t column( j*colsPerThread );
142 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
145 if( simdEnabled && lhsAligned && rhsAligned ) {
146 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
147 TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
149 else if( simdEnabled && lhsAligned ) {
150 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
151 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
153 else if( simdEnabled && rhsAligned ) {
154 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
155 TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
158 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
159 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
164 TheThreadBackend::wait();
187 template<
typename MT1
191 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
197 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
201 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
202 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
204 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
205 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
207 for(
size_t i=0UL; i<threads.first; ++i )
209 const size_t row( i*rowsPerThread );
214 for(
size_t j=0UL; j<threads.second; ++j )
216 const size_t column( j*colsPerThread );
221 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
224 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
225 TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
229 TheThreadBackend::wait();
253 template<
typename MT1
257 inline EnableIf_< And< IsDenseMatrix<MT1>
258 , Or< Not< IsSMPAssignable<MT1> >
259 , Not< IsSMPAssignable<MT2> > > > >
260 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
267 assign( ~lhs, ~rhs );
291 template<
typename MT1
295 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
296 smpAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
309 assign( ~lhs, ~rhs );
312 smpAssign_backend( ~lhs, ~rhs );
345 template<
typename MT1
349 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
355 typedef ElementType_<MT1> ET1;
356 typedef ElementType_<MT2> ET2;
357 typedef SubmatrixExprTrait_<MT1,aligned> AlignedTarget;
358 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
360 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<MT1> >
::size };
362 const bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSame<ET1,ET2>::value );
363 const bool lhsAligned ( (~lhs).isAligned() );
364 const bool rhsAligned ( (~rhs).isAligned() );
368 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
369 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
370 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
371 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
373 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
374 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
375 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
376 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
378 for(
size_t i=0UL; i<threads.first; ++i )
380 const size_t row( i*rowsPerThread );
385 for(
size_t j=0UL; j<threads.second; ++j )
387 const size_t column( j*colsPerThread );
392 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
395 if( simdEnabled && lhsAligned && rhsAligned ) {
396 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
397 TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
399 else if( simdEnabled && lhsAligned ) {
400 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
401 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
403 else if( simdEnabled && rhsAligned ) {
404 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
405 TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
408 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
409 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
414 TheThreadBackend::wait();
437 template<
typename MT1
441 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
447 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
451 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
452 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
454 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
455 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
457 for(
size_t i=0UL; i<threads.first; ++i )
459 const size_t row( i*rowsPerThread );
464 for(
size_t j=0UL; j<threads.second; ++j )
466 const size_t column( j*colsPerThread );
471 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
474 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
475 TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
479 TheThreadBackend::wait();
504 template<
typename MT1
508 inline EnableIf_< And< IsDenseMatrix<MT1>
509 , Or< Not< IsSMPAssignable<MT1> >
510 , Not< IsSMPAssignable<MT2> > > > >
511 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
518 addAssign( ~lhs, ~rhs );
542 template<
typename MT1
546 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
547 smpAddAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
560 addAssign( ~lhs, ~rhs );
563 smpAddAssign_backend( ~lhs, ~rhs );
596 template<
typename MT1
600 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const DenseMatrix<MT2,SO2>& rhs )
606 typedef ElementType_<MT1> ET1;
607 typedef ElementType_<MT2> ET2;
608 typedef SubmatrixExprTrait_<MT1,aligned> AlignedTarget;
609 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
611 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<MT1> >
::size };
613 const bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSame<ET1,ET2>::value );
614 const bool lhsAligned ( (~lhs).isAligned() );
615 const bool rhsAligned ( (~rhs).isAligned() );
619 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
620 const size_t equalShare1( (~rhs).
rows() / threads.first + addon1 );
621 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
622 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
624 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
625 const size_t equalShare2( (~rhs).
columns() / threads.second + addon2 );
626 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
627 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
629 for(
size_t i=0UL; i<threads.first; ++i )
631 const size_t row( i*rowsPerThread );
636 for(
size_t j=0UL; j<threads.second; ++j )
638 const size_t column( j*colsPerThread );
643 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
646 if( simdEnabled && lhsAligned && rhsAligned ) {
647 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
648 TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
650 else if( simdEnabled && lhsAligned ) {
651 AlignedTarget target( submatrix<aligned>( ~lhs,
row,
column, m, n ) );
652 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
654 else if( simdEnabled && rhsAligned ) {
655 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
656 TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs,
row,
column, m, n ) );
659 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
660 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
665 TheThreadBackend::wait();
688 template<
typename MT1
692 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs,
const SparseMatrix<MT2,SO2>& rhs )
698 typedef SubmatrixExprTrait_<MT1,unaligned> UnalignedTarget;
702 const size_t addon1 ( ( ( (~rhs).
rows() % threads.first ) != 0UL )? 1UL : 0UL );
703 const size_t rowsPerThread( (~rhs).
rows() / threads.first + addon1 );
705 const size_t addon2 ( ( ( (~rhs).
columns() % threads.second ) != 0UL )? 1UL : 0UL );
706 const size_t colsPerThread( (~rhs).
columns() / threads.second + addon2 );
708 for(
size_t i=0UL; i<threads.first; ++i )
710 const size_t row( i*rowsPerThread );
715 for(
size_t j=0UL; j<threads.second; ++j )
717 const size_t column( j*colsPerThread );
722 const size_t m(
min( rowsPerThread, (~lhs).
rows() -
row ) );
725 UnalignedTarget target( submatrix<unaligned>( ~lhs,
row,
column, m, n ) );
726 TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs,
row,
column, m, n ) );
730 TheThreadBackend::wait();
755 template<
typename MT1
759 inline EnableIf_< And< IsDenseMatrix<MT1>
760 , Or< Not< IsSMPAssignable<MT1> >
761 , Not< IsSMPAssignable<MT2> > > > >
762 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
769 subAssign( ~lhs, ~rhs );
794 template<
typename MT1
798 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
799 smpSubAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
812 subAssign( ~lhs, ~rhs );
815 smpSubAssign_backend( ~lhs, ~rhs );
848 template<
typename MT1
852 inline EnableIf_< IsDenseMatrix<MT1> >
853 smpMultAssign( Matrix<MT1,SO1>& lhs,
const Matrix<MT2,SO2>& rhs )
860 multAssign( ~lhs, ~rhs );
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
Header file for mathematical functions.
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:258
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
Header file for the IsSame and IsStrictlySame type traits.
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1669
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT > >, ColumnExprTrait_< MT > > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:126
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:330
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT > >, RowExprTrait_< MT > > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:126
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the SubmatrixExprTrait class template.
Header file for run time assertion macros.
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:314
Header file for the AreSIMDCombinable type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.