DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
44 #include <blaze/math/Aliases.h>
63 #include <blaze/system/SMP.h>
65 #include <blaze/util/Assert.h>
66 #include <blaze/util/EnableIf.h>
69 #include <blaze/util/Types.h>
70 
71 
72 namespace blaze {
73 
74 //=================================================================================================
75 //
76 // OPENMP-BASED ASSIGNMENT KERNELS
77 //
78 //=================================================================================================
79 
80 //*************************************************************************************************
97 template< typename MT1 // Type of the left-hand side dense matrix
98  , bool SO1 // Storage order of the left-hand side dense matrix
99  , typename MT2 // Type of the right-hand side dense matrix
100  , bool SO2 // Storage order of the right-hand side dense matrix
101  , typename OP > // Type of the assignment operation
102 void openmpAssign( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs, OP op )
103 {
105 
106  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
107 
108  using ET1 = ElementType_t<MT1>;
109  using ET2 = ElementType_t<MT2>;
110 
111  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
112  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<MT1> >::size );
113 
114  const bool lhsAligned( (~lhs).isAligned() );
115  const bool rhsAligned( (~rhs).isAligned() );
116 
117  const int threads( omp_get_num_threads() );
118  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
119 
120  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
121  const size_t equalShare1( (~rhs).rows() / threadmap.first + addon1 );
122  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
123  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
124 
125  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
126  const size_t equalShare2( (~rhs).columns() / threadmap.second + addon2 );
127  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
128  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
129 
130 #pragma omp for schedule(dynamic,1) nowait
131  for( int i=0; i<threads; ++i )
132  {
133  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
134  const size_t column( ( i % threadmap.second ) * colsPerThread );
135 
136  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
137  continue;
138 
139  const size_t m( min( rowsPerThread, (~rhs).rows() - row ) );
140  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
141 
142  if( simdEnabled && lhsAligned && rhsAligned ) {
143  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
144  const auto source( submatrix<aligned>( ~rhs, row, column, m, n ) );
145  op( target, source );
146  }
147  else if( simdEnabled && lhsAligned ) {
148  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
149  const auto source( submatrix<unaligned>( ~rhs, row, column, m, n ) );
150  op( target, source );
151  }
152  else if( simdEnabled && rhsAligned ) {
153  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
154  const auto source( submatrix<aligned>( ~rhs, row, column, m, n ) );
155  op( target, source );
156  }
157  else {
158  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
159  const auto source( submatrix<unaligned>( ~rhs, row, column, m, n ) );
160  op( target, source );
161  }
162  }
163 }
165 //*************************************************************************************************
166 
167 
168 //*************************************************************************************************
185 template< typename MT1 // Type of the left-hand side dense matrix
186  , bool SO1 // Storage order of the left-hand side dense matrix
187  , typename MT2 // Type of the right-hand side sparse matrix
188  , bool SO2 // Storage order of the right-hand side sparse matrix
189  , typename OP > // Type of the assignment operation
190 void openmpAssign( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs, OP op )
191 {
193 
194  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
195 
196  const size_t threads( omp_get_num_threads() );
197  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
198 
199  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
200  const size_t rowsPerThread( (~rhs).rows() / threadmap.first + addon1 );
201 
202  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
203  const size_t colsPerThread( (~rhs).columns() / threadmap.second + addon2 );
204 
205 #pragma omp for schedule(dynamic,1) nowait
206  for( size_t i=0; i<threads; ++i )
207  {
208  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
209  const size_t column( ( i % threadmap.second ) * colsPerThread );
210 
211  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
212  continue;
213 
214  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
215  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
216 
217  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
218  const auto source( submatrix<unaligned>( ~rhs, row, column, m, n ) );
219  op( target, source );
220  }
221 }
223 //*************************************************************************************************
224 
225 
226 
227 
228 //=================================================================================================
229 //
230 // PLAIN ASSIGNMENT
231 //
232 //=================================================================================================
233 
234 //*************************************************************************************************
252 template< typename MT1 // Type of the left-hand side dense matrix
253  , bool SO1 // Storage order of the left-hand side dense matrix
254  , typename MT2 // Type of the right-hand side matrix
255  , bool SO2 > // Storage order of the right-hand side matrix
256 inline auto smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
257  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
258 {
260 
261  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
262  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
263 
264  assign( ~lhs, ~rhs );
265 }
267 //*************************************************************************************************
268 
269 
270 //*************************************************************************************************
288 template< typename MT1 // Type of the left-hand side dense matrix
289  , bool SO1 // Storage order of the left-hand side dense matrix
290  , typename MT2 // Type of the right-hand side matrix
291  , bool SO2 > // Storage order of the right-hand side matrix
292 inline auto smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
293  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
294 {
296 
297  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
298  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
299 
300  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
301  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
302 
304  {
305  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
306  assign( ~lhs, ~rhs );
307  }
308  else {
309 #pragma omp parallel shared( lhs, rhs )
310  openmpAssign( ~lhs, ~rhs, Assign() );
311  }
312  }
313 }
315 //*************************************************************************************************
316 
317 
318 
319 
320 //=================================================================================================
321 //
322 // ADDITION ASSIGNMENT
323 //
324 //=================================================================================================
325 
326 //*************************************************************************************************
344 template< typename MT1 // Type of the left-hand side dense matrix
345  , bool SO1 // Storage order of the left-hand side dense matrix
346  , typename MT2 // Type of the right-hand side matrix
347  , bool SO2 > // Storage order of the right-hand side matrix
348 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
349  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
350 {
352 
353  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
354  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
355 
356  addAssign( ~lhs, ~rhs );
357 }
359 //*************************************************************************************************
360 
361 
362 //*************************************************************************************************
380 template< typename MT1 // Type of the left-hand side dense matrix
381  , bool SO1 // Storage order of the left-hand side dense matrix
382  , typename MT2 // Type of the right-hand side matrix
383  , bool SO2 > // Storage order of the right-hand side matrix
384 inline auto smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
385  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
386 {
388 
389  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
390  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
391 
392  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
393  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
394 
396  {
397  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
398  addAssign( ~lhs, ~rhs );
399  }
400  else {
401 #pragma omp parallel shared( lhs, rhs )
402  openmpAssign( ~lhs, ~rhs, AddAssign() );
403  }
404  }
405 }
407 //*************************************************************************************************
408 
409 
410 
411 
412 //=================================================================================================
413 //
414 // SUBTRACTION ASSIGNMENT
415 //
416 //=================================================================================================
417 
418 //*************************************************************************************************
436 template< typename MT1 // Type of the left-hand side dense matrix
437  , bool SO1 // Storage order of the left-hand side dense matrix
438  , typename MT2 // Type of the right-hand side matrix
439  , bool SO2 > // Storage order of the right-hand side matrix
440 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
441  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
442 {
444 
445  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
446  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
447 
448  subAssign( ~lhs, ~rhs );
449 }
451 //*************************************************************************************************
452 
453 
454 //*************************************************************************************************
472 template< typename MT1 // Type of the left-hand side dense matrix
473  , bool SO1 // Storage order of the left-hand side dense matrix
474  , typename MT2 // Type of the right-hand side matrix
475  , bool SO2 > // Storage order of the right-hand side matrix
476 inline auto smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
477  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
478 {
480 
481  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
482  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
483 
484  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
485  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
486 
488  {
489  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
490  subAssign( ~lhs, ~rhs );
491  }
492  else {
493 #pragma omp parallel shared( lhs, rhs )
494  openmpAssign( ~lhs, ~rhs, SubAssign() );
495  }
496  }
497 }
499 //*************************************************************************************************
500 
501 
502 
503 
504 //=================================================================================================
505 //
506 // SCHUR PRODUCT ASSIGNMENT
507 //
508 //=================================================================================================
509 
510 //*************************************************************************************************
528 template< typename MT1 // Type of the left-hand side dense matrix
529  , bool SO1 // Storage order of the left-hand side dense matrix
530  , typename MT2 // Type of the right-hand side matrix
531  , bool SO2 > // Storage order of the right-hand side matrix
532 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
533  -> EnableIf_t< IsDenseMatrix_v<MT1> && ( !IsSMPAssignable_v<MT1> || !IsSMPAssignable_v<MT2> ) >
534 {
536 
537  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
538  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
539 
540  schurAssign( ~lhs, ~rhs );
541 }
543 //*************************************************************************************************
544 
545 
546 //*************************************************************************************************
564 template< typename MT1 // Type of the left-hand side dense matrix
565  , bool SO1 // Storage order of the left-hand side dense matrix
566  , typename MT2 // Type of the right-hand side matrix
567  , bool SO2 > // Storage order of the right-hand side matrix
568 inline auto smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
569  -> EnableIf_t< IsDenseMatrix_v<MT1> && IsSMPAssignable_v<MT1> && IsSMPAssignable_v<MT2> >
570 {
572 
573  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT1> );
574  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<MT2> );
575 
576  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
577  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
578 
580  {
581  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
582  schurAssign( ~lhs, ~rhs );
583  }
584  else {
585 #pragma omp parallel shared( lhs, rhs )
586  openmpAssign( ~lhs, ~rhs, SchurAssign() );
587  }
588  }
589 }
591 //*************************************************************************************************
592 
593 
594 
595 
596 //=================================================================================================
597 //
598 // MULTIPLICATION ASSIGNMENT
599 //
600 //=================================================================================================
601 
602 //*************************************************************************************************
618 template< typename MT1 // Type of the left-hand side dense matrix
619  , bool SO1 // Storage order of the left-hand side matrix
620  , typename MT2 // Type of the right-hand side matrix
621  , bool SO2 > // Storage order of the right-hand side matrix
622 inline auto smpMultAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
623  -> EnableIf_t< IsDenseMatrix_v<MT1> >
624 {
626 
627  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
628  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
629 
630  multAssign( ~lhs, ~rhs );
631 }
633 //*************************************************************************************************
634 
635 
636 
637 
638 //=================================================================================================
639 //
640 // COMPILE TIME CONSTRAINT
641 //
642 //=================================================================================================
643 
644 //*************************************************************************************************
646 namespace {
647 
649 
650 }
652 //*************************************************************************************************
653 
654 } // namespace blaze
655 
656 #endif
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the alignment flag values.
Header file for basic type definitions.
Header file for the Assign functor.
Header file for the AddAssign functor.
Header file for the SIMD trait.
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
Header file for the DenseMatrix base class.
Header file for the serial section implementation.
Header file for the SchurAssign functor.
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
Header file for the MultAssign functor.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:68
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:191
Constraint on the data type.
Header file for the function trace functionality.