DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
57 #include <blaze/system/SMP.h>
59 #include <blaze/util/Assert.h>
60 #include <blaze/util/EnableIf.h>
62 #include <blaze/util/mpl/And.h>
63 #include <blaze/util/mpl/Not.h>
64 #include <blaze/util/mpl/Or.h>
66 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // PLAIN ASSIGNMENT
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
93 template< typename MT1 // Type of the left-hand side dense matrix
94  , bool SO1 // Storage order of the left-hand side dense matrix
95  , typename MT2 // Type of the right-hand side dense matrix
96  , bool SO2 > // Storage order of the right-hand side dense matrix
97 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
98 {
100 
101  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
102 
103  using ET1 = ElementType_<MT1>;
104  using ET2 = ElementType_<MT2>;
105 
106  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
107  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
108 
109  const bool lhsAligned( (~lhs).isAligned() );
110  const bool rhsAligned( (~rhs).isAligned() );
111 
112  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
113 
114  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
115  const size_t equalShare1( (~rhs).rows() / threads.first + addon1 );
116  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
117  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
118 
119  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
120  const size_t equalShare2( (~rhs).columns() / threads.second + addon2 );
121  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
122  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
123 
124  for( size_t i=0UL; i<threads.first; ++i )
125  {
126  const size_t row( i*rowsPerThread );
127 
128  if( row >= (~lhs).rows() )
129  continue;
130 
131  for( size_t j=0UL; j<threads.second; ++j )
132  {
133  const size_t column( j*colsPerThread );
134 
135  if( column >= (~rhs).columns() )
136  continue;
137 
138  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
139  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
140 
141  if( simdEnabled && lhsAligned && rhsAligned ) {
142  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
143  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
144  }
145  else if( simdEnabled && lhsAligned ) {
146  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
147  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
148  }
149  else if( simdEnabled && rhsAligned ) {
150  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
151  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
152  }
153  else {
154  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
155  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
156  }
157  }
158  }
159 
160  TheThreadBackend::wait();
161 }
163 //*************************************************************************************************
164 
165 
166 //*************************************************************************************************
183 template< typename MT1 // Type of the left-hand side dense matrix
184  , bool SO1 // Storage order of the left-hand side dense matrix
185  , typename MT2 // Type of the right-hand side sparse matrix
186  , bool SO2 > // Storage order of the right-hand side sparse matrix
187 void smpAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
188 {
190 
191  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
192 
193  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
194 
195  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
196  const size_t rowsPerThread( (~rhs).rows() / threads.first + addon1 );
197 
198  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
199  const size_t colsPerThread( (~rhs).columns() / threads.second + addon2 );
200 
201  for( size_t i=0UL; i<threads.first; ++i )
202  {
203  const size_t row( i*rowsPerThread );
204 
205  if( row >= (~lhs).rows() )
206  continue;
207 
208  for( size_t j=0UL; j<threads.second; ++j )
209  {
210  const size_t column( j*colsPerThread );
211 
212  if( column >= (~lhs).columns() )
213  continue;
214 
215  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
216  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
217 
218  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
219  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
220  }
221  }
222 
223  TheThreadBackend::wait();
224 }
226 //*************************************************************************************************
227 
228 
229 //*************************************************************************************************
247 template< typename MT1 // Type of the left-hand side dense matrix
248  , bool SO1 // Storage order of the left-hand side dense matrix
249  , typename MT2 // Type of the right-hand side matrix
250  , bool SO2 > // Storage order of the right-hand side matrix
251 inline EnableIf_< And< IsDenseMatrix<MT1>
252  , Or< Not< IsSMPAssignable<MT1> >
253  , Not< IsSMPAssignable<MT2> > > > >
254  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
255 {
257 
258  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
259  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
260 
261  assign( ~lhs, ~rhs );
262 }
264 //*************************************************************************************************
265 
266 
267 //*************************************************************************************************
285 template< typename MT1 // Type of the left-hand side dense matrix
286  , bool SO1 // Storage order of the left-hand side dense matrix
287  , typename MT2 // Type of the right-hand side matrix
288  , bool SO2 > // Storage order of the right-hand side matrix
289 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
290  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
291 {
293 
294  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
295  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
296 
297  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
298  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
299 
301  {
302  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
303  assign( ~lhs, ~rhs );
304  }
305  else {
306  smpAssign_backend( ~lhs, ~rhs );
307  }
308  }
309 }
311 //*************************************************************************************************
312 
313 
314 
315 
316 //=================================================================================================
317 //
318 // ADDITION ASSIGNMENT
319 //
320 //=================================================================================================
321 
322 //*************************************************************************************************
339 template< typename MT1 // Type of the left-hand side dense matrix
340  , bool SO1 // Storage order of the left-hand side dense matrix
341  , typename MT2 // Type of the right-hand side dense matrix
342  , bool SO2 > // Storage order fo the right-hand side dense matrix
343 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
344 {
346 
347  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
348 
349  using ET1 = ElementType_<MT1>;
350  using ET2 = ElementType_<MT2>;
351 
352  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
353  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
354 
355  const bool lhsAligned( (~lhs).isAligned() );
356  const bool rhsAligned( (~rhs).isAligned() );
357 
358  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
359 
360  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
361  const size_t equalShare1( (~rhs).rows() / threads.first + addon1 );
362  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
363  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
364 
365  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
366  const size_t equalShare2( (~rhs).columns() / threads.second + addon2 );
367  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
368  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
369 
370  for( size_t i=0UL; i<threads.first; ++i )
371  {
372  const size_t row( i*rowsPerThread );
373 
374  if( row >= (~lhs).rows() )
375  continue;
376 
377  for( size_t j=0UL; j<threads.second; ++j )
378  {
379  const size_t column( j*colsPerThread );
380 
381  if( column >= (~rhs).columns() )
382  continue;
383 
384  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
385  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
386 
387  if( simdEnabled && lhsAligned && rhsAligned ) {
388  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
389  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
390  }
391  else if( simdEnabled && lhsAligned ) {
392  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
393  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
394  }
395  else if( simdEnabled && rhsAligned ) {
396  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
397  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
398  }
399  else {
400  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
401  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
402  }
403  }
404  }
405 
406  TheThreadBackend::wait();
407 }
409 //*************************************************************************************************
410 
411 
412 //*************************************************************************************************
429 template< typename MT1 // Type of the left-hand side dense matrix
430  , bool SO1 // Storage order of the left-hand side dense matrix
431  , typename MT2 // Type of the right-hand side sparse matrix
432  , bool SO2 > // Storage order of the right-hand side sparse matrix
433 void smpAddAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
434 {
436 
437  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
438 
439  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
440 
441  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
442  const size_t rowsPerThread( (~rhs).rows() / threads.first + addon1 );
443 
444  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
445  const size_t colsPerThread( (~rhs).columns() / threads.second + addon2 );
446 
447  for( size_t i=0UL; i<threads.first; ++i )
448  {
449  const size_t row( i*rowsPerThread );
450 
451  if( row >= (~lhs).rows() )
452  continue;
453 
454  for( size_t j=0UL; j<threads.second; ++j )
455  {
456  const size_t column( j*colsPerThread );
457 
458  if( column >= (~lhs).columns() )
459  continue;
460 
461  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
462  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
463 
464  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
465  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
466  }
467  }
468 
469  TheThreadBackend::wait();
470 }
472 //*************************************************************************************************
473 
474 
475 //*************************************************************************************************
494 template< typename MT1 // Type of the left-hand side dense matrix
495  , bool SO1 // Storage order of the left-hand side dense matrix
496  , typename MT2 // Type of the right-hand side matrix
497  , bool SO2 > // Storage order of the right-hand side matrix
498 inline EnableIf_< And< IsDenseMatrix<MT1>
499  , Or< Not< IsSMPAssignable<MT1> >
500  , Not< IsSMPAssignable<MT2> > > > >
501  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
502 {
504 
505  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
506  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
507 
508  addAssign( ~lhs, ~rhs );
509 }
511 //*************************************************************************************************
512 
513 
514 //*************************************************************************************************
532 template< typename MT1 // Type of the left-hand side dense matrix
533  , bool SO1 // Storage order of the left-hand side dense matrix
534  , typename MT2 // Type of the right-hand side matrix
535  , bool SO2 > // Storage order of the right-hand side matrix
536 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
537  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
538 {
540 
541  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
542  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
543 
544  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
545  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
546 
548  {
549  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
550  addAssign( ~lhs, ~rhs );
551  }
552  else {
553  smpAddAssign_backend( ~lhs, ~rhs );
554  }
555  }
556 }
558 //*************************************************************************************************
559 
560 
561 
562 
563 //=================================================================================================
564 //
565 // SUBTRACTION ASSIGNMENT
566 //
567 //=================================================================================================
568 
569 //*************************************************************************************************
586 template< typename MT1 // Type of the left-hand side dense matrix
587  , bool SO1 // Storage order of the left-hand side dense matrix
588  , typename MT2 // Type of the right-hand side dense matrix
589  , bool SO2 > // Storage order of the right-hand side dense matrix
590 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
591 {
593 
594  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
595 
596  using ET1 = ElementType_<MT1>;
597  using ET2 = ElementType_<MT2>;
598 
599  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
600  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
601 
602  const bool lhsAligned( (~lhs).isAligned() );
603  const bool rhsAligned( (~rhs).isAligned() );
604 
605  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
606 
607  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
608  const size_t equalShare1( (~rhs).rows() / threads.first + addon1 );
609  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
610  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
611 
612  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
613  const size_t equalShare2( (~rhs).columns() / threads.second + addon2 );
614  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
615  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
616 
617  for( size_t i=0UL; i<threads.first; ++i )
618  {
619  const size_t row( i*rowsPerThread );
620 
621  if( row >= (~lhs).rows() )
622  continue;
623 
624  for( size_t j=0UL; j<threads.second; ++j )
625  {
626  const size_t column( j*colsPerThread );
627 
628  if( column >= (~rhs).columns() )
629  continue;
630 
631  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
632  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
633 
634  if( simdEnabled && lhsAligned && rhsAligned ) {
635  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
636  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
637  }
638  else if( simdEnabled && lhsAligned ) {
639  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
640  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
641  }
642  else if( simdEnabled && rhsAligned ) {
643  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
644  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
645  }
646  else {
647  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
648  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
649  }
650  }
651  }
652 
653  TheThreadBackend::wait();
654 }
656 //*************************************************************************************************
657 
658 
659 //*************************************************************************************************
676 template< typename MT1 // Type of the left-hand side dense matrix
677  , bool SO1 // Storage order of the left-hand side dense matrix
678  , typename MT2 // Type of the right-hand side sparse matrix
679  , bool SO2 > // Storage order of the right-hand side sparse matrix
680 void smpSubAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
681 {
683 
684  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
685 
686  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
687 
688  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
689  const size_t rowsPerThread( (~rhs).rows() / threads.first + addon1 );
690 
691  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
692  const size_t colsPerThread( (~rhs).columns() / threads.second + addon2 );
693 
694  for( size_t i=0UL; i<threads.first; ++i )
695  {
696  const size_t row( i*rowsPerThread );
697 
698  if( row >= (~lhs).rows() )
699  continue;
700 
701  for( size_t j=0UL; j<threads.second; ++j )
702  {
703  const size_t column( j*colsPerThread );
704 
705  if( column >= (~lhs).columns() )
706  continue;
707 
708  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
709  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
710 
711  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
712  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
713  }
714  }
715 
716  TheThreadBackend::wait();
717 }
719 //*************************************************************************************************
720 
721 
722 //*************************************************************************************************
741 template< typename MT1 // Type of the left-hand side dense matrix
742  , bool SO1 // Storage order of the left-hand side dense matrix
743  , typename MT2 // Type of the right-hand side matrix
744  , bool SO2 > // Storage order of the right-hand side matrix
745 inline EnableIf_< And< IsDenseMatrix<MT1>
746  , Or< Not< IsSMPAssignable<MT1> >
747  , Not< IsSMPAssignable<MT2> > > > >
748  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
749 {
751 
752  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
753  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
754 
755  subAssign( ~lhs, ~rhs );
756 }
758 //*************************************************************************************************
759 
760 
761 //*************************************************************************************************
780 template< typename MT1 // Type of the left-hand side dense matrix
781  , bool SO1 // Storage order of the left-hand side dense matrix
782  , typename MT2 // Type of the right-hand side matrix
783  , bool SO2 > // Storage order of the right-hand side matrix
784 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
785  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
786 {
788 
789  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
790  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
791 
792  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
793  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
794 
796  {
797  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
798  subAssign( ~lhs, ~rhs );
799  }
800  else {
801  smpSubAssign_backend( ~lhs, ~rhs );
802  }
803  }
804 }
806 //*************************************************************************************************
807 
808 
809 
810 
811 //=================================================================================================
812 //
813 // SCHUR PRODUCT ASSIGNMENT
814 //
815 //=================================================================================================
816 
817 //*************************************************************************************************
834 template< typename MT1 // Type of the left-hand side dense matrix
835  , bool SO1 // Storage order of the left-hand side dense matrix
836  , typename MT2 // Type of the right-hand side dense matrix
837  , bool SO2 > // Storage order fo the right-hand side dense matrix
838 void smpSchurAssign_backend( DenseMatrix<MT1,SO1>& lhs, const DenseMatrix<MT2,SO2>& rhs )
839 {
841 
842  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
843 
844  using ET1 = ElementType_<MT1>;
845  using ET2 = ElementType_<MT2>;
846 
847  constexpr bool simdEnabled( MT1::simdEnabled && MT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
848  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<MT1> >::size );
849 
850  const bool lhsAligned( (~lhs).isAligned() );
851  const bool rhsAligned( (~rhs).isAligned() );
852 
853  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
854 
855  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
856  const size_t equalShare1( (~rhs).rows() / threads.first + addon1 );
857  const size_t rest1 ( equalShare1 & ( SIMDSIZE - 1UL ) );
858  const size_t rowsPerThread( ( simdEnabled && rest1 )?( equalShare1 - rest1 + SIMDSIZE ):( equalShare1 ) );
859 
860  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
861  const size_t equalShare2( (~rhs).columns() / threads.second + addon2 );
862  const size_t rest2 ( equalShare2 & ( SIMDSIZE - 1UL ) );
863  const size_t colsPerThread( ( simdEnabled && rest2 )?( equalShare2 - rest2 + SIMDSIZE ):( equalShare2 ) );
864 
865  for( size_t i=0UL; i<threads.first; ++i )
866  {
867  const size_t row( i*rowsPerThread );
868 
869  if( row >= (~lhs).rows() )
870  continue;
871 
872  for( size_t j=0UL; j<threads.second; ++j )
873  {
874  const size_t column( j*colsPerThread );
875 
876  if( column >= (~rhs).columns() )
877  continue;
878 
879  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
880  const size_t n( min( colsPerThread, (~rhs).columns() - column ) );
881 
882  if( simdEnabled && lhsAligned && rhsAligned ) {
883  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
884  TheThreadBackend::scheduleSchurAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
885  }
886  else if( simdEnabled && lhsAligned ) {
887  auto target( submatrix<aligned>( ~lhs, row, column, m, n ) );
888  TheThreadBackend::scheduleSchurAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
889  }
890  else if( simdEnabled && rhsAligned ) {
891  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
892  TheThreadBackend::scheduleSchurAssign( target, submatrix<aligned>( ~rhs, row, column, m, n ) );
893  }
894  else {
895  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
896  TheThreadBackend::scheduleSchurAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
897  }
898  }
899  }
900 
901  TheThreadBackend::wait();
902 }
904 //*************************************************************************************************
905 
906 
907 //*************************************************************************************************
924 template< typename MT1 // Type of the left-hand side dense matrix
925  , bool SO1 // Storage order of the left-hand side dense matrix
926  , typename MT2 // Type of the right-hand side sparse matrix
927  , bool SO2 > // Storage order of the right-hand side sparse matrix
928 void smpSchurAssign_backend( DenseMatrix<MT1,SO1>& lhs, const SparseMatrix<MT2,SO2>& rhs )
929 {
931 
932  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
933 
934  const ThreadMapping threads( createThreadMapping( TheThreadBackend::size(), ~rhs ) );
935 
936  const size_t addon1 ( ( ( (~rhs).rows() % threads.first ) != 0UL )? 1UL : 0UL );
937  const size_t rowsPerThread( (~rhs).rows() / threads.first + addon1 );
938 
939  const size_t addon2 ( ( ( (~rhs).columns() % threads.second ) != 0UL )? 1UL : 0UL );
940  const size_t colsPerThread( (~rhs).columns() / threads.second + addon2 );
941 
942  for( size_t i=0UL; i<threads.first; ++i )
943  {
944  const size_t row( i*rowsPerThread );
945 
946  if( row >= (~lhs).rows() )
947  continue;
948 
949  for( size_t j=0UL; j<threads.second; ++j )
950  {
951  const size_t column( j*colsPerThread );
952 
953  if( column >= (~lhs).columns() )
954  continue;
955 
956  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
957  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
958 
959  auto target( submatrix<unaligned>( ~lhs, row, column, m, n ) );
960  TheThreadBackend::scheduleSchurAssign( target, submatrix<unaligned>( ~rhs, row, column, m, n ) );
961  }
962  }
963 
964  TheThreadBackend::wait();
965 }
967 //*************************************************************************************************
968 
969 
970 //*************************************************************************************************
989 template< typename MT1 // Type of the left-hand side dense matrix
990  , bool SO1 // Storage order of the left-hand side dense matrix
991  , typename MT2 // Type of the right-hand side matrix
992  , bool SO2 > // Storage order of the right-hand side matrix
993 inline EnableIf_< And< IsDenseMatrix<MT1>
994  , Or< Not< IsSMPAssignable<MT1> >
995  , Not< IsSMPAssignable<MT2> > > > >
996  smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
997 {
999 
1000  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1001  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1002 
1003  schurAssign( ~lhs, ~rhs );
1004 }
1006 //*************************************************************************************************
1007 
1008 
1009 //*************************************************************************************************
1028 template< typename MT1 // Type of the left-hand side dense matrix
1029  , bool SO1 // Storage order of the left-hand side dense matrix
1030  , typename MT2 // Type of the right-hand side matrix
1031  , bool SO2 > // Storage order of the right-hand side matrix
1032 inline EnableIf_< And< IsDenseMatrix<MT1>, IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >
1033  smpSchurAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1034 {
1036 
1037  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT1> );
1038  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<MT2> );
1039 
1040  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1041  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1042 
1044  {
1045  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1046  schurAssign( ~lhs, ~rhs );
1047  }
1048  else {
1049  smpSchurAssign_backend( ~lhs, ~rhs );
1050  }
1051  }
1052 }
1054 //*************************************************************************************************
1055 
1056 
1057 
1058 
1059 //=================================================================================================
1060 //
1061 // MULTIPLICATION ASSIGNMENT
1062 //
1063 //=================================================================================================
1064 
1065 //*************************************************************************************************
1082 template< typename MT1 // Type of the left-hand side dense matrix
1083  , bool SO1 // Storage order of the left-hand side matrix
1084  , typename MT2 // Type of the right-hand side matrix
1085  , bool SO2 > // Storage order of the right-hand side matrix
1086 inline EnableIf_< IsDenseMatrix<MT1> >
1087  smpMultAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1088 {
1090 
1091  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1092  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1093 
1094  multAssign( ~lhs, ~rhs );
1095 }
1097 //*************************************************************************************************
1098 
1099 
1100 
1101 
1102 //=================================================================================================
1103 //
1104 // COMPILE TIME CONSTRAINT
1105 //
1106 //=================================================================================================
1107 
1108 //*************************************************************************************************
1110 namespace {
1111 
1113 
1114 }
1116 //*************************************************************************************************
1117 
1118 } // namespace blaze
1119 
1120 #endif
Header file for the implementation of the Submatrix view.
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Header file for basic type definitions.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Column< MT > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:124
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the SparseMatrix base class.
Header file for the SMP thread mapping functionality.
Row< MT > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:124
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the DenseMatrix base class.
Header file for the Not class template.
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:340
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:324
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.