DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
44 #include <blaze/math/Aliases.h>
58 #include <blaze/system/SMP.h>
60 #include <blaze/util/Assert.h>
61 #include <blaze/util/EnableIf.h>
63 #include <blaze/util/mpl/And.h>
64 #include <blaze/util/mpl/Not.h>
65 #include <blaze/util/mpl/Or.h>
67 #include <blaze/util/Types.h>
68 
69 
70 namespace blaze {
71 
72 //=================================================================================================
73 //
74 // PLAIN ASSIGNMENT
75 //
76 //=================================================================================================
77 
78 //*************************************************************************************************
94 template< typename MT1 // Type of the left-hand side dense matrix
95  , bool SO1 // Storage order of the left-hand side dense matrix
96  , typename MT2 // Type of the right-hand side dense matrix
97  , bool SO2 > // Storage order of the right-hand side dense matrix
98 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
99 {
101 
102  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
103 
104  using ET1 = ElementType_<MT1>;
105  using ET2 = ElementType_<MT2>;
106 
107  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
108  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
109 
110  const bool lhsAligned( (~lhs).isAligned() );
111  const bool rhsAligned( (~rhs).isAligned() );
112 
113  const int threads( omp_get_num_threads() );
114  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
115 
116  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
117  const size_t equalShare1( (~rhs).rows() / threadmap.first + addon1 );
118  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
119  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
120 
121  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
122  const size_t equalShare2( (~rhs).columns() / threadmap.second + addon2 );
123  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
124  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
125 
126 #pragma omp for schedule(dynamic,1) nowait
127  for( int i=0; i<threads; ++i )
128  {
129  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
130  const size_t column( ( i % threadmap.second ) * colsPerThread );
131 
132  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
133  continue;
134 
135  const size_t m( min( rowsPerThread, (~rhs).rows() - row ) );
136  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
137 
138  if( simdEnabled && lhsAligned && rhsAligned ) {
139  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
140  assign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
141  }
142  else if( simdEnabled && lhsAligned ) {
143  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
144  assign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
145  }
146  else if( simdEnabled && rhsAligned ) {
147  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
148  assign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
149  }
150  else {
151  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
152  assign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
153  }
154  }
155 }
157 //*************************************************************************************************
158 
159 
160 //*************************************************************************************************
176 template< typename MT1 // Type of the left-hand side dense matrix
177  , bool SO1 // Storage order of the left-hand side dense matrix
178  , typename MT2 // Type of the right-hand side sparse matrix
179  , bool SO2 > // Storage order of the right-hand side sparse matrix
180 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
181 {
183 
184  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
185 
186  const size_t threads( omp_get_num_threads() );
187  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
188 
189  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
190  const size_t rowsPerThread( (~rhs).rows() / threadmap.first + addon1 );
191 
192  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
193  const size_t colsPerThread( (~rhs).columns() / threadmap.second + addon2 );
194 
195 #pragma omp for schedule(dynamic,1) nowait
196  for( size_t i=0; i<threads; ++i )
197  {
198  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
199  const size_t column( ( i % threadmap.second ) * colsPerThread );
200 
201  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
202  continue;
203 
204  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
205  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
206 
207  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
208  assign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
209  }
210 }
212 //*************************************************************************************************
213 
214 
215 //*************************************************************************************************
233 template< typename MT1 // Type of the left-hand side dense matrix
234  , bool SO1 // Storage order of the left-hand side dense matrix
235  , typename MT2 // Type of the right-hand side matrix
236  , bool SO2 > // Storage order of the right-hand side matrix
237 inline EnableIf_< And< IsDenseMatrix<MT1>
238  , Or< Not< IsSMPAssignable<MT1> >
239  , Not< IsSMPAssignable<MT2> > > > >
240  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
241 {
243 
244  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
245  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
246 
247  assign( ~lhs, ~rhs );
248 }
250 //*************************************************************************************************
251 
252 
253 //*************************************************************************************************
271 template< typename MT1 // Type of the left-hand side dense matrix
272  , bool SO1 // Storage order of the left-hand side dense matrix
273  , typename MT2 // Type of the right-hand side matrix
274  , bool SO2 > // Storage order of the right-hand side matrix
275 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
276  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
277 {
279 
280  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
281  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
282 
283  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
284  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
285 
287  {
288  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
289  assign( ~lhs, ~rhs );
290  }
291  else {
292 #pragma omp parallel shared( lhs, rhs )
293  smpAssign_backend( ~lhs, ~rhs );
294  }
295  }
296 }
298 //*************************************************************************************************
299 
300 
301 
302 
303 //=================================================================================================
304 //
305 // ADDITION ASSIGNMENT
306 //
307 //=================================================================================================
308 
309 //*************************************************************************************************
325 template< typename MT1 // Type of the left-hand side dense matrix
326  , bool SO1 // Storage order of the left-hand side dense matrix
327  , typename MT2 // Type of the right-hand side dense matrix
328  , bool SO2 > // Storage order of the right-hand side dense matrix
329 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
330 {
332 
333  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
334 
335  using ET1 = ElementType_<MT1>;
336  using ET2 = ElementType_<MT2>;
337 
338  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
339  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
340 
341  const bool lhsAligned( (~lhs).isAligned() );
342  const bool rhsAligned( (~rhs).isAligned() );
343 
344  const int threads( omp_get_num_threads() );
345  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
346 
347  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
348  const size_t equalShare1( (~rhs).rows() / threadmap.first + addon1 );
349  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
350  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
351 
352  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
353  const size_t equalShare2( (~rhs).columns() / threadmap.second + addon2 );
354  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
355  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
356 
357 #pragma omp for schedule(dynamic,1) nowait
358  for( int i=0; i<threads; ++i )
359  {
360  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
361  const size_t column( ( i % threadmap.second ) * colsPerThread );
362 
363  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
364  continue;
365 
366  const size_t m( min( rowsPerThread, (~rhs).rows() - row ) );
367  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
368 
369  if( simdEnabled && lhsAligned && rhsAligned ) {
370  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
371  addAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
372  }
373  else if( simdEnabled && lhsAligned ) {
374  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
375  addAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
376  }
377  else if( simdEnabled && rhsAligned ) {
378  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
379  addAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
380  }
381  else {
382  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
383  addAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
384  }
385  }
386 }
388 //*************************************************************************************************
389 
390 
391 //*************************************************************************************************
407 template< typename MT1 // Type of the left-hand side dense matrix
408  , bool SO1 // Storage order of the left-hand side dense matrix
409  , typename MT2 // Type of the right-hand side sparse matrix
410  , bool SO2 > // Storage order of the right-hand side sparse matrix
411 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
412 {
414 
415  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
416 
417  const size_t threads( omp_get_num_threads() );
418  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
419 
420  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
421  const size_t rowsPerThread( (~rhs).rows() / threadmap.first + addon1 );
422 
423  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
424  const size_t colsPerThread( (~rhs).columns() / threadmap.second + addon2 );
425 
426 #pragma omp for schedule(dynamic,1) nowait
427  for( size_t i=0; i<threads; ++i )
428  {
429  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
430  const size_t column( ( i % threadmap.second ) * colsPerThread );
431 
432  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
433  continue;
434 
435  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
436  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
437 
438  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
439  addAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
440  }
441 }
443 //*************************************************************************************************
444 
445 
446 //*************************************************************************************************
464 template< typename MT1 // Type of the left-hand side dense matrix
465  , bool SO1 // Storage order of the left-hand side dense matrix
466  , typename MT2 // Type of the right-hand side matrix
467  , bool SO2 > // Storage order of the right-hand side matrix
468 inline EnableIf_< And< IsDenseMatrix<MT1>
469  , Or< Not< IsSMPAssignable<MT1> >
470  , Not< IsSMPAssignable<MT2> > > > >
471  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
472 {
474 
475  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
476  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
477 
478  addAssign( ~lhs, ~rhs );
479 }
481 //*************************************************************************************************
482 
483 
484 //*************************************************************************************************
502 template< typename MT1 // Type of the left-hand side dense matrix
503  , bool SO1 // Storage order of the left-hand side dense matrix
504  , typename MT2 // Type of the right-hand side matrix
505  , bool SO2 > // Storage order of the right-hand side matrix
506 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
507  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
508 {
510 
511  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
512  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
513 
514  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
515  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
516 
518  {
519  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
520  addAssign( ~lhs, ~rhs );
521  }
522  else {
523 #pragma omp parallel shared( lhs, rhs )
524  smpAddAssign_backend( ~lhs, ~rhs );
525  }
526  }
527 }
529 //*************************************************************************************************
530 
531 
532 
533 
534 //=================================================================================================
535 //
536 // SUBTRACTION ASSIGNMENT
537 //
538 //=================================================================================================
539 
540 //*************************************************************************************************
556 template< typename MT1 // Type of the left-hand side dense matrix
557  , bool SO1 // Storage order of the left-hand side dense matrix
558  , typename MT2 // Type of the right-hand side dense matrix
559  , bool SO2 > // Storage order of the right-hand side dense matrix
560 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
561 {
563 
564  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
565 
566  using ET1 = ElementType_<MT1>;
567  using ET2 = ElementType_<MT2>;
568 
569  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
570  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
571 
572  const bool lhsAligned( (~lhs).isAligned() );
573  const bool rhsAligned( (~rhs).isAligned() );
574 
575  const int threads( omp_get_num_threads() );
576  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
577 
578  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
579  const size_t equalShare1( (~rhs).rows() / threadmap.first + addon1 );
580  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
581  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
582 
583  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
584  const size_t equalShare2( (~rhs).columns() / threadmap.second + addon2 );
585  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
586  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
587 
588 #pragma omp for schedule(dynamic,1) nowait
589  for( int i=0; i<threads; ++i )
590  {
591  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
592  const size_t column( ( i % threadmap.second ) * colsPerThread );
593 
594  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
595  continue;
596 
597  const size_t m( min( rowsPerThread, (~rhs).rows() - row ) );
598  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
599 
600  if( simdEnabled && lhsAligned && rhsAligned ) {
601  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
602  subAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
603  }
604  else if( simdEnabled && lhsAligned ) {
605  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
606  subAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
607  }
608  else if( simdEnabled && rhsAligned ) {
609  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
610  subAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
611  }
612  else {
613  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
614  subAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
615  }
616  }
617 }
619 //*************************************************************************************************
620 
621 
622 //*************************************************************************************************
639 template< typename MT1 // Type of the left-hand side dense matrix
640  , bool SO1 // Storage order of the left-hand side dense matrix
641  , typename MT2 // Type of the right-hand side sparse matrix
642  , bool SO2 > // Storage order of the right-hand side sparse matrix
643 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
644 {
646 
647  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
648 
649  const size_t threads( omp_get_num_threads() );
650  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
651 
652  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
653  const size_t rowsPerThread( (~rhs).rows() / threadmap.first + addon1 );
654 
655  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
656  const size_t colsPerThread( (~rhs).columns() / threadmap.second + addon2 );
657 
658 #pragma omp for schedule(dynamic,1) nowait
659  for( size_t i=0; i<threads; ++i )
660  {
661  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
662  const size_t column( ( i % threadmap.second ) * colsPerThread );
663 
664  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
665  continue;
666 
667  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
668  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
669 
670  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
671  subAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
672  }
673 }
675 //*************************************************************************************************
676 
677 
678 //*************************************************************************************************
696 template< typename MT1 // Type of the left-hand side dense matrix
697  , bool SO1 // Storage order of the left-hand side dense matrix
698  , typename MT2 // Type of the right-hand side matrix
699  , bool SO2 > // Storage order of the right-hand side matrix
700 inline EnableIf_< And< IsDenseMatrix<MT1>
701  , Or< Not< IsSMPAssignable<MT1> >
702  , Not< IsSMPAssignable<MT2> > > > >
703  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
704 {
706 
707  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
708  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
709 
710  subAssign( ~lhs, ~rhs );
711 }
713 //*************************************************************************************************
714 
715 
716 //*************************************************************************************************
734 template< typename MT1 // Type of the left-hand side dense matrix
735  , bool SO1 // Storage order of the left-hand side dense matrix
736  , typename MT2 // Type of the right-hand side matrix
737  , bool SO2 > // Storage order of the right-hand side matrix
738 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
739  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
740 {
742 
743  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
744  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
745 
746  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
747  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
748 
750  {
751  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
752  subAssign( ~lhs, ~rhs );
753  }
754  else {
755 #pragma omp parallel shared( lhs, rhs )
756  smpSubAssign_backend( ~lhs, ~rhs );
757  }
758  }
759 }
761 //*************************************************************************************************
762 
763 
764 
765 
766 //=================================================================================================
767 //
768 // SCHUR PRODUCT ASSIGNMENT
769 //
770 //=================================================================================================
771 
772 //*************************************************************************************************
789 template< typename MT1 // Type of the left-hand side dense matrix
790  , bool SO1 // Storage order of the left-hand side dense matrix
791  , typename MT2 // Type of the right-hand side dense matrix
792  , bool SO2 > // Storage order of the right-hand side dense matrix
793 void smpSchurAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
794 {
796 
797  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
798 
799  using ET1 = ElementType_<MT1>;
800  using ET2 = ElementType_<MT2>;
801 
802  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
803  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
804 
805  const bool lhsAligned( (~lhs).isAligned() );
806  const bool rhsAligned( (~rhs).isAligned() );
807 
808  const int threads( omp_get_num_threads() );
809  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
810 
811  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
812  const size_t equalShare1( (~rhs).rows() / threadmap.first + addon1 );
813  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
814  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
815 
816  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
817  const size_t equalShare2( (~rhs).columns() / threadmap.second + addon2 );
818  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
819  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
820 
821 #pragma omp for schedule(dynamic,1) nowait
822  for( int i=0; i<threads; ++i )
823  {
824  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
825  const size_t column( ( i % threadmap.second ) * colsPerThread );
826 
827  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
828  continue;
829 
830  const size_t m( min( rowsPerThread, (~rhs).rows() - row ) );
831  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
832 
833  if( simdEnabled && lhsAligned && rhsAligned ) {
834  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
835  schurAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
836  }
837  else if( simdEnabled && lhsAligned ) {
838  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
839  schurAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
840  }
841  else if( simdEnabled && rhsAligned ) {
842  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
843  schurAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
844  }
845  else {
846  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
847  schurAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
848  }
849  }
850 }
852 //*************************************************************************************************
853 
854 
855 //*************************************************************************************************
872 template< typename MT1 // Type of the left-hand side dense matrix
873  , bool SO1 // Storage order of the left-hand side dense matrix
874  , typename MT2 // Type of the right-hand side sparse matrix
875  , bool SO2 > // Storage order of the right-hand side sparse matrix
876 void smpSchurAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
877 {
879 
880  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
881 
882  const size_t threads( omp_get_num_threads() );
883  const ThreadMapping threadmap( createThreadMapping( threads, ~rhs ) );
884 
885  const size_t addon1 ( ( ( (~rhs).rows() % threadmap.first ) != 0UL )? 1UL : 0UL );
886  const size_t rowsPerThread( (~rhs).rows() / threadmap.first + addon1 );
887 
888  const size_t addon2 ( ( ( (~rhs).columns() % threadmap.second ) != 0UL )? 1UL : 0UL );
889  const size_t colsPerThread( (~rhs).columns() / threadmap.second + addon2 );
890 
891 #pragma omp for schedule(dynamic,1) nowait
892  for( size_t i=0; i<threads; ++i )
893  {
894  const size_t row ( ( i / threadmap.second ) * rowsPerThread );
895  const size_t column( ( i % threadmap.second ) * colsPerThread );
896 
897  if( row >= (~rhs).rows() || column >= (~rhs).columns() )
898  continue;
899 
900  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
901  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
902 
903  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
904  schurAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
905  }
906 }
908 //*************************************************************************************************
909 
910 
911 //*************************************************************************************************
929 template< typename MT1 // Type of the left-hand side dense matrix
930  , bool SO1 // Storage order of the left-hand side dense matrix
931  , typename MT2 // Type of the right-hand side matrix
932  , bool SO2 > // Storage order of the right-hand side matrix
933 inline EnableIf_< And< IsDenseMatrix<MT1>
934  , Or< Not< IsSMPAssignable<MT1> >
935  , Not< IsSMPAssignable<MT2> > > > >
936  smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
937 {
939 
940  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
941  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
942 
943  schurAssign( ~lhs, ~rhs );
944 }
946 //*************************************************************************************************
947 
948 
949 //*************************************************************************************************
967 template< typename MT1 // Type of the left-hand side dense matrix
968  , bool SO1 // Storage order of the left-hand side dense matrix
969  , typename MT2 // Type of the right-hand side matrix
970  , bool SO2 > // Storage order of the right-hand side matrix
971 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
972  smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
973 {
975 
976  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
977  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
978 
979  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
980  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
981 
983  {
984  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
985  schurAssign( ~lhs, ~rhs );
986  }
987  else {
988 #pragma omp parallel shared( lhs, rhs )
989  smpSchurAssign_backend( ~lhs, ~rhs );
990  }
991  }
992 }
994 //*************************************************************************************************
995 
996 
997 
998 
999 //=================================================================================================
1000 //
1001 // MULTIPLICATION ASSIGNMENT
1002 //
1003 //=================================================================================================
1004 
1005 //*************************************************************************************************
1021 template< typename MT1 // Type of the left-hand side dense matrix
1022  , bool SO1 // Storage order of the left-hand side matrix
1023  , typename MT2 // Type of the right-hand side matrix
1024  , bool SO2 > // Storage order of the right-hand side matrix
1025 inline EnableIf_< IsDenseMatrix<MT1> >
1026  smpMultAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1027 {
1029 
1030  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1031  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1032 
1033  multAssign( ~lhs, ~rhs );
1034 }
1036 //*************************************************************************************************
1037 
1038 
1039 
1040 
1041 //=================================================================================================
1042 //
1043 // COMPILE TIME CONSTRAINT
1044 //
1045 //=================================================================================================
1046 
1047 //*************************************************************************************************
1049 namespace {
1050 
1052 
1053 }
1055 //*************************************************************************************************
1056 
1057 } // namespace blaze
1058 
1059 #endif
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for the alignment flag values.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Column< MT > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:124
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Row< MT > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:124
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:340
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:324
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.