Blaze  3.6
DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
44 #include <blaze/math/Aliases.h>
58 #include <blaze/system/SMP.h>
60 #include <blaze/util/Assert.h>
61 #include <blaze/util/EnableIf.h>
64 #include <blaze/util/Types.h>
65 
66 
67 namespace blaze {
68 
69 //=================================================================================================
70 //
71 // OPENMP-BASED ASSIGNMENT KERNELS
72 //
73 //=================================================================================================
74 
75 //*************************************************************************************************
92 template< typename MT1 // Type of the left-hand side dense matrix
93  , bool SO1 // Storage order of the left-hand side dense matrix
94  , typename MT2 // Type of the right-hand side dense matrix
95  , bool SO2 // Storage order of the right-hand side dense matrix
96  , typename OP > // Type of the assignment operation
97 void openmpAssign( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs, OP op )
98 {
100 
101  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
102 
103  using ET1 = ElementType_t<MT1>;
104  using ET2 = ElementType_t<MT2>;
105 
106  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
107  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >::size );
108 
109  const bool lhsAligned( (~lhs).isAligned() );
110  const bool rhsAligned( (~rhs).isAligned() );
111 
112  const int threads( omp_get_num_threads() );
113  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
114 
115  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
116  const size_t equalShare1( (~rhs).rows() / threadmap.first + addon1 );
117  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
118  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
119 
120  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
121  const size_t equalShare2( (~rhs).columns() / threadmap.second + addon2 );
122  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
123  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
124 
125 #pragma omp for schedule(dynamic,1) nowait
126  for( int i=0; i<threads; ++i )
127  {
128  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
129  const size_t column( ( i % threadmap.second ) * colsPerThread );
130 
131  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
132  continue;
133 
134  const size_t m( min( rowsPerThread, (~rhs).rows() - row ) );
135  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
136 
137  if( simdEnabled && lhsAligned && rhsAligned ) {
138  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
139  const auto source( submatrix<aligned>( ~rhs, row, column, m, n ) );
140  op( target, source );
141  }
142  else if( simdEnabled && lhsAligned ) {
143  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
144  const auto source( submatrix<unaligned>( ~rhs, row, column, m, n ) );
145  op( target, source );
146  }
147  else if( simdEnabled && rhsAligned ) {
148  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
149  const auto source( submatrix<aligned>( ~rhs, row, column, m, n ) );
150  op( target, source );
151  }
152  else {
153  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
154  const auto source( submatrix<unaligned>( ~rhs, row, column, m, n ) );
155  op( target, source );
156  }
157  }
158 }
160 //*************************************************************************************************
161 
162 
163 //*************************************************************************************************
180 template< typename MT1 // Type of the left-hand side dense matrix
181  , bool SO1 // Storage order of the left-hand side dense matrix
182  , typename MT2 // Type of the right-hand side sparse matrix
183  , bool SO2 // Storage order of the right-hand side sparse matrix
184  , typename OP > // Type of the assignment operation
185 void openmpAssign( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs, OP op )
186 {
188 
189  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
190 
191  const size_t threads( omp_get_num_threads() );
192  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
193 
194  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
195  const size_t rowsPerThread( (~rhs).rows() / threadmap.first + addon1 );
196 
197  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
198  const size_t colsPerThread( (~rhs).columns() / threadmap.second + addon2 );
199 
200 #pragma omp for schedule(dynamic,1) nowait
201  for( size_t i=0; i<threads; ++i )
202  {
203  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
204  const size_t column( ( i % threadmap.second ) * colsPerThread );
205 
206  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
207  continue;
208 
209  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
210  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
211 
212  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
213  const auto source( submatrix<unaligned>( ~rhs, row, column, m, n ) );
214  op( target, source );
215  }
216 }
218 //*************************************************************************************************
219 
220 
221 
222 
223 //=================================================================================================
224 //
225 // PLAIN ASSIGNMENT
226 //
227 //=================================================================================================
228 
229 //*************************************************************************************************
247 template< typename MT1 // Type of the left-hand side dense matrix
248  , bool SO1 // Storage order of the left-hand side dense matrix
249  , typename MT2 // Type of the right-hand side matrix
250  , bool SO2 > // Storage order of the right-hand side matrix
251 inline auto smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
252  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
253 {
255 
256  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
257  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
258 
259  assign( ~lhs, ~rhs );
260 }
262 //*************************************************************************************************
263 
264 
265 //*************************************************************************************************
283 template< typename MT1 // Type of the left-hand side dense matrix
284  , bool SO1 // Storage order of the left-hand side dense matrix
285  , typename MT2 // Type of the right-hand side matrix
286  , bool SO2 > // Storage order of the right-hand side matrix
287 inline auto smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
288  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
289 {
291 
292  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
293  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
294 
295  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
296  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
297 
299  {
300  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
301  assign( ~lhs, ~rhs );
302  }
303  else {
304 #pragma omp parallel shared( lhs, rhs )
305  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ assign( a, b ); } );
306  }
307  }
308 }
310 //*************************************************************************************************
311 
312 
313 
314 
315 //=================================================================================================
316 //
317 // ADDITION ASSIGNMENT
318 //
319 //=================================================================================================
320 
321 //*************************************************************************************************
339 template< typename MT1 // Type of the left-hand side dense matrix
340  , bool SO1 // Storage order of the left-hand side dense matrix
341  , typename MT2 // Type of the right-hand side matrix
342  , bool SO2 > // Storage order of the right-hand side matrix
343 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
344  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
345 {
347 
348  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
349  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
350 
351  addAssign( ~lhs, ~rhs );
352 }
354 //*************************************************************************************************
355 
356 
357 //*************************************************************************************************
375 template< typename MT1 // Type of the left-hand side dense matrix
376  , bool SO1 // Storage order of the left-hand side dense matrix
377  , typename MT2 // Type of the right-hand side matrix
378  , bool SO2 > // Storage order of the right-hand side matrix
379 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
380  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
381 {
383 
384  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
385  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
386 
387  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
388  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
389 
391  {
392  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
393  addAssign( ~lhs, ~rhs );
394  }
395  else {
396 #pragma omp parallel shared( lhs, rhs )
397  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ addAssign( a, b ); } );
398  }
399  }
400 }
402 //*************************************************************************************************
403 
404 
405 
406 
407 //=================================================================================================
408 //
409 // SUBTRACTION ASSIGNMENT
410 //
411 //=================================================================================================
412 
413 //*************************************************************************************************
431 template< typename MT1 // Type of the left-hand side dense matrix
432  , bool SO1 // Storage order of the left-hand side dense matrix
433  , typename MT2 // Type of the right-hand side matrix
434  , bool SO2 > // Storage order of the right-hand side matrix
435 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
436  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
437 {
439 
440  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
441  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
442 
443  subAssign( ~lhs, ~rhs );
444 }
446 //*************************************************************************************************
447 
448 
449 //*************************************************************************************************
467 template< typename MT1 // Type of the left-hand side dense matrix
468  , bool SO1 // Storage order of the left-hand side dense matrix
469  , typename MT2 // Type of the right-hand side matrix
470  , bool SO2 > // Storage order of the right-hand side matrix
471 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
472  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
473 {
475 
476  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
477  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
478 
479  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
480  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
481 
483  {
484  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
485  subAssign( ~lhs, ~rhs );
486  }
487  else {
488 #pragma omp parallel shared( lhs, rhs )
489  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ subAssign( a, b ); } );
490  }
491  }
492 }
494 //*************************************************************************************************
495 
496 
497 
498 
499 //=================================================================================================
500 //
501 // SCHUR PRODUCT ASSIGNMENT
502 //
503 //=================================================================================================
504 
505 //*************************************************************************************************
523 template< typename MT1 // Type of the left-hand side dense matrix
524  , bool SO1 // Storage order of the left-hand side dense matrix
525  , typename MT2 // Type of the right-hand side matrix
526  , bool SO2 > // Storage order of the right-hand side matrix
527 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
528  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
529 {
531 
532  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
533  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
534 
535  schurAssign( ~lhs, ~rhs );
536 }
538 //*************************************************************************************************
539 
540 
541 //*************************************************************************************************
559 template< typename MT1 // Type of the left-hand side dense matrix
560  , bool SO1 // Storage order of the left-hand side dense matrix
561  , typename MT2 // Type of the right-hand side matrix
562  , bool SO2 > // Storage order of the right-hand side matrix
563 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
564  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
565 {
567 
568  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
569  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
570 
571  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
572  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
573 
575  {
576  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
577  schurAssign( ~lhs, ~rhs );
578  }
579  else {
580 #pragma omp parallel shared( lhs, rhs )
581  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ schurAssign( a, b ); } );
582  }
583  }
584 }
586 //*************************************************************************************************
587 
588 
589 
590 
591 //=================================================================================================
592 //
593 // MULTIPLICATION ASSIGNMENT
594 //
595 //=================================================================================================
596 
597 //*************************************************************************************************
613 template< typename MT1 // Type of the left-hand side dense matrix
614  , bool SO1 // Storage order of the left-hand side matrix
615  , typename MT2 // Type of the right-hand side matrix
616  , bool SO2 > // Storage order of the right-hand side matrix
617 inline auto smpMultAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
618  -> EnableIf_t< IsDenseMatrix_v<MT1> >
619 {
621 
622  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
623  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
624 
625  multAssign( ~lhs, ~rhs );
626 }
628 //*************************************************************************************************
629 
630 
631 
632 
633 //=================================================================================================
634 //
635 // COMPILE TIME CONSTRAINT
636 //
637 //=================================================================================================
638 
639 //*************************************************************************************************
641 namespace {
642 
644 
645 }
647 //*************************************************************************************************
648 
649 } // namespace blaze
650 
651 #endif
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the alignment flag values.
Header file for basic type definitions.
Header file for the SIMD trait.
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
Header file for the DenseMatrix base class.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:68
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:191
Constraint on the data type.
Header file for the function trace functionality.