Blaze 3.9
DenseMatrix.h
Go to the documentation of this file.
1//=================================================================================================
33//=================================================================================================
34
35#ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36#define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
37
38
39//*************************************************************************************************
40// Includes
41//*************************************************************************************************
42
43#include <omp.h>
44#include <blaze/math/Aliases.h>
58#include <blaze/system/SMP.h>
60#include <blaze/util/Assert.h>
61#include <blaze/util/EnableIf.h>
64#include <blaze/util/Types.h>
65
66
67namespace blaze {
68
69//=================================================================================================
70//
71// OPENMP-BASED ASSIGNMENT KERNELS
72//
73//=================================================================================================
74
75//*************************************************************************************************
92template< typename MT1 // Type of the left-hand side dense matrix
93 , bool SO1 // Storage order of the left-hand side dense matrix
94 , typename MT2 // Type of the right-hand side dense matrix
95 , bool SO2 // Storage order of the right-hand side dense matrix
96 , typename OP > // Type of the assignment operation
97void openmpAssign( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs, OP op )
98{
100
101 BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
102
103 using ET1 = ElementType_t<MT1>;
104 using ET2 = ElementType_t<MT2>;
105
106 constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
107 constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >::size );
108
109 const bool lhsAligned( (*lhs).isAligned() );
110 const bool rhsAligned( (*rhs).isAligned() );
111
112 const int threads( omp_get_num_threads() );
113 const ThreadMapping threadmap( createThreadMapping( threads, *rhs ) );
114
115 const size_t addon1 ( ( ( (*rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
116 const size_t equalShare1( (*rhs).rows() / threadmap.first + addon1 );
117 const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
118 const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
119
120 const size_t addon2 ( ( ( (*rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
121 const size_t equalShare2( (*rhs).columns() / threadmap.second + addon2 );
122 const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
123 const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
124
125#pragma omp for schedule(dynamic,1) nowait
126 for( int i=0; i<threads; ++i )
127 {
128 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
129 const size_t column( ( i % threadmap.second ) * colsPerThread );
130
131 if( row >= (*rhs).rows() || column >= (*rhs).columns() )
132 continue;
133
134 const size_t m( min( rowsPerThread, (*rhs).rows() - row ) );
135 const size_t n( min( colsPerThread, (*rhs).columns() - column ) );
136
137 if( simdEnabled && lhsAligned && rhsAligned ) {
138 auto target( submatrix<aligned>( *lhs, row, column, m, n ) );
139 const auto source( submatrix<aligned>( *rhs, row, column, m, n ) );
140 op( target, source );
141 }
142 else if( simdEnabled && lhsAligned ) {
143 auto target( submatrix<aligned>( *lhs, row, column, m, n ) );
144 const auto source( submatrix<unaligned>( *rhs, row, column, m, n ) );
145 op( target, source );
146 }
147 else if( simdEnabled && rhsAligned ) {
148 auto target( submatrix<unaligned>( *lhs, row, column, m, n ) );
149 const auto source( submatrix<aligned>( *rhs, row, column, m, n ) );
150 op( target, source );
151 }
152 else {
153 auto target( submatrix<unaligned>( *lhs, row, column, m, n ) );
154 const auto source( submatrix<unaligned>( *rhs, row, column, m, n ) );
155 op( target, source );
156 }
157 }
158}
160//*************************************************************************************************
161
162
163//*************************************************************************************************
180template< typename MT1 // Type of the left-hand side dense matrix
181 , bool SO1 // Storage order of the left-hand side dense matrix
182 , typename MT2 // Type of the right-hand side sparse matrix
183 , bool SO2 // Storage order of the right-hand side sparse matrix
184 , typename OP > // Type of the assignment operation
185void openmpAssign( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs, OP op )
186{
188
189 BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
190
191 const size_t threads( omp_get_num_threads() );
192 const ThreadMapping threadmap( createThreadMapping( threads, *rhs ) );
193
194 const size_t addon1 ( ( ( (*rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
195 const size_t rowsPerThread( (*rhs).rows() / threadmap.first + addon1 );
196
197 const size_t addon2 ( ( ( (*rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
198 const size_t colsPerThread( (*rhs).columns() / threadmap.second + addon2 );
199
200#pragma omp for schedule(dynamic,1) nowait
201 for( size_t i=0; i<threads; ++i )
202 {
203 const size_t row ( ( i / threadmap.second ) * rowsPerThread );
204 const size_t column( ( i % threadmap.second ) * colsPerThread );
205
206 if( row >= (*rhs).rows() || column >= (*rhs).columns() )
207 continue;
208
209 const size_t m( min( rowsPerThread, (*lhs).rows() - row ) );
210 const size_t n( min( colsPerThread, (*lhs).columns() - column ) );
211
212 auto target( submatrix<unaligned>( *lhs, row, column, m, n ) );
213 const auto source( submatrix<unaligned>( *rhs, row, column, m, n ) );
214 op( target, source );
215 }
216}
218//*************************************************************************************************
219
220
221
222
223//=================================================================================================
224//
225// PLAIN ASSIGNMENT
226//
227//=================================================================================================
228
229//*************************************************************************************************
247template< typename MT1 // Type of the left-hand side dense matrix
248 , bool SO1 // Storage order of the left-hand side dense matrix
249 , typename MT2 // Type of the right-hand side matrix
250 , bool SO2 > // Storage order of the right-hand side matrix
251inline auto smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
252 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
253{
255
256 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
257 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
258
259 assign( *lhs, *rhs );
260}
262//*************************************************************************************************
263
264
265//*************************************************************************************************
283template< typename MT1 // Type of the left-hand side dense matrix
284 , bool SO1 // Storage order of the left-hand side dense matrix
285 , typename MT2 // Type of the right-hand side matrix
286 , bool SO2 > // Storage order of the right-hand side matrix
287inline auto smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
288 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
289{
291
294
295 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
296 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
297
299 {
300 if( isSerialSectionActive() || !(*rhs).canSMPAssign() ) {
301 assign( *lhs, *rhs );
302 }
303 else {
304#pragma omp parallel shared( lhs, rhs )
305 openmpAssign( *lhs, *rhs, []( auto& a, const auto& b ){ assign( a, b ); } );
306 }
307 }
308}
310//*************************************************************************************************
311
312
313
314
315//=================================================================================================
316//
317// ADDITION ASSIGNMENT
318//
319//=================================================================================================
320
321//*************************************************************************************************
339template< typename MT1 // Type of the left-hand side dense matrix
340 , bool SO1 // Storage order of the left-hand side dense matrix
341 , typename MT2 // Type of the right-hand side matrix
342 , bool SO2 > // Storage order of the right-hand side matrix
343inline auto smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
344 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
345{
347
348 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
349 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
350
351 addAssign( *lhs, *rhs );
352}
354//*************************************************************************************************
355
356
357//*************************************************************************************************
375template< typename MT1 // Type of the left-hand side dense matrix
376 , bool SO1 // Storage order of the left-hand side dense matrix
377 , typename MT2 // Type of the right-hand side matrix
378 , bool SO2 > // Storage order of the right-hand side matrix
379inline auto smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
380 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
381{
383
386
387 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
388 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
389
391 {
392 if( isSerialSectionActive() || !(*rhs).canSMPAssign() ) {
393 addAssign( *lhs, *rhs );
394 }
395 else {
396#pragma omp parallel shared( lhs, rhs )
397 openmpAssign( *lhs, *rhs, []( auto& a, const auto& b ){ addAssign( a, b ); } );
398 }
399 }
400}
402//*************************************************************************************************
403
404
405
406
407//=================================================================================================
408//
409// SUBTRACTION ASSIGNMENT
410//
411//=================================================================================================
412
413//*************************************************************************************************
431template< typename MT1 // Type of the left-hand side dense matrix
432 , bool SO1 // Storage order of the left-hand side dense matrix
433 , typename MT2 // Type of the right-hand side matrix
434 , bool SO2 > // Storage order of the right-hand side matrix
435inline auto smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
436 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
437{
439
440 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
441 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
442
443 subAssign( *lhs, *rhs );
444}
446//*************************************************************************************************
447
448
449//*************************************************************************************************
467template< typename MT1 // Type of the left-hand side dense matrix
468 , bool SO1 // Storage order of the left-hand side dense matrix
469 , typename MT2 // Type of the right-hand side matrix
470 , bool SO2 > // Storage order of the right-hand side matrix
471inline auto smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
472 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
473{
475
478
479 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
480 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
481
483 {
484 if( isSerialSectionActive() || !(*rhs).canSMPAssign() ) {
485 subAssign( *lhs, *rhs );
486 }
487 else {
488#pragma omp parallel shared( lhs, rhs )
489 openmpAssign( *lhs, *rhs, []( auto& a, const auto& b ){ subAssign( a, b ); } );
490 }
491 }
492}
494//*************************************************************************************************
495
496
497
498
499//=================================================================================================
500//
501// SCHUR PRODUCT ASSIGNMENT
502//
503//=================================================================================================
504
505//*************************************************************************************************
523template< typename MT1 // Type of the left-hand side dense matrix
524 , bool SO1 // Storage order of the left-hand side dense matrix
525 , typename MT2 // Type of the right-hand side matrix
526 , bool SO2 > // Storage order of the right-hand side matrix
527inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
528 -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
529{
531
532 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
533 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
534
535 schurAssign( *lhs, *rhs );
536}
538//*************************************************************************************************
539
540
541//*************************************************************************************************
559template< typename MT1 // Type of the left-hand side dense matrix
560 , bool SO1 // Storage order of the left-hand side dense matrix
561 , typename MT2 // Type of the right-hand side matrix
562 , bool SO2 > // Storage order of the right-hand side matrix
563inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
564 -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
565{
567
570
571 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
572 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
573
575 {
576 if( isSerialSectionActive() || !(*rhs).canSMPAssign() ) {
577 schurAssign( *lhs, *rhs );
578 }
579 else {
580#pragma omp parallel shared( lhs, rhs )
581 openmpAssign( *lhs, *rhs, []( auto& a, const auto& b ){ schurAssign( a, b ); } );
582 }
583 }
584}
586//*************************************************************************************************
587
588
589
590
591//=================================================================================================
592//
593// MULTIPLICATION ASSIGNMENT
594//
595//=================================================================================================
596
597//*************************************************************************************************
613template< typename MT1 // Type of the left-hand side dense matrix
614 , bool SO1 // Storage order of the left-hand side matrix
615 , typename MT2 // Type of the right-hand side matrix
616 , bool SO2 > // Storage order of the right-hand side matrix
617inline auto smpMultAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
618 -> EnableIf_t< IsDenseMatrix_v<MT1> >
619{
621
622 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == (*rhs).rows() , "Invalid number of rows" );
623 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).columns(), "Invalid number of columns" );
624
625 multAssign( *lhs, *rhs );
626}
628//*************************************************************************************************
629
630
631
632
633//=================================================================================================
634//
635// COMPILE TIME CONSTRAINT
636//
637//=================================================================================================
638
639//*************************************************************************************************
641namespace {
642
644
645}
647//*************************************************************************************************
648
649} // namespace blaze
650
651#endif
Header file for auxiliary alias declarations.
Header file for the alignment flag enumeration.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the IsDenseMatrix type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsSMPAssignable type trait.
Header file for the parallel section implementation.
Header file for the SIMD trait.
Constraint on the data type.
Header file for the serial section implementation.
Compile time assertion.
Header file for the SMP thread mapping functionality.
Header file for the DenseMatrix base class.
Header file for the SparseMatrix base class.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:137
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.
Definition: SMPAssignable.h:81
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:137
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.
Definition: ParallelSection.h:254
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.
Definition: SMP.h:68
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
Header file for the matrix storage order types.
System settings for the shared-memory parallelization.
Header file for basic type definitions.
Header file for the generic min algorithm.
Header file for the implementation of the Submatrix view.