DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
47 #include <blaze/math/Functions.h>
57 #include <blaze/system/SMP.h>
58 #include <blaze/util/Assert.h>
59 #include <blaze/util/EnableIf.h>
61 #include <blaze/util/mpl/And.h>
62 #include <blaze/util/mpl/Not.h>
63 #include <blaze/util/mpl/Or.h>
65 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // PLAIN ASSIGNMENT
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
94 template< typename MT1 // Type of the left-hand side dense matrix
95  , bool SO // Storage order of the left-hand side dense matrix
96  , typename MT2 > // Type of the right-hand side dense matrix
97 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
98 {
100 
101  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
102 
103  typedef typename MT1::ElementType ET1;
104  typedef typename MT2::ElementType ET2;
105  typedef IntrinsicTrait<typename MT1::ElementType> IT;
106  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
107  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
108 
109  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
110  const bool lhsAligned ( (~lhs).isAligned() );
111  const bool rhsAligned ( (~rhs).isAligned() );
112 
113  const size_t threads ( TheThreadBackend::size() );
114  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
115  const size_t equalShare ( (~lhs).rows() / threads + addon );
116  const size_t rest ( equalShare & ( IT::size - 1UL ) );
117  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
118 
119  for( size_t i=0UL; i<threads; ++i )
120  {
121  const size_t row( i*rowsPerThread );
122 
123  if( row >= (~lhs).rows() )
124  continue;
125 
126  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
127 
128  if( vectorizable && lhsAligned && rhsAligned ) {
129  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
130  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
131  }
132  else if( vectorizable && lhsAligned ) {
133  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
134  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
135  }
136  else if( vectorizable && rhsAligned ) {
137  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
138  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
139  }
140  else {
141  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
142  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
143  }
144  }
145 
146  TheThreadBackend::wait();
147 }
149 //*************************************************************************************************
150 
151 
152 //*************************************************************************************************
169 template< typename MT1 // Type of the left-hand side dense matrix
170  , bool SO // Storage order of the left-hand side dense matrix
171  , typename MT2 > // Type of the right-hand side dense matrix
172 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
173 {
175 
176  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
177 
178  typedef typename MT1::ElementType ET1;
179  typedef typename MT2::ElementType ET2;
180  typedef IntrinsicTrait<typename MT1::ElementType> IT;
181  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
182  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
183 
184  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
185  const bool lhsAligned ( (~lhs).isAligned() );
186  const bool rhsAligned ( (~rhs).isAligned() );
187 
188  const size_t threads ( TheThreadBackend::size() );
189  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
190  const size_t equalShare ( (~lhs).columns() / threads + addon );
191  const size_t rest ( equalShare & ( IT::size - 1UL ) );
192  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
193 
194  for( size_t i=0UL; i<threads; ++i )
195  {
196  const size_t column( i*colsPerThread );
197 
198  if( column >= (~lhs).columns() )
199  continue;
200 
201  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
202 
203  if( vectorizable && lhsAligned && rhsAligned ) {
204  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
205  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
206  }
207  else if( vectorizable && lhsAligned ) {
208  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
209  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
210  }
211  else if( vectorizable && rhsAligned ) {
212  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
213  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
214  }
215  else {
216  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
217  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
218  }
219  }
220 
221  TheThreadBackend::wait();
222 }
224 //*************************************************************************************************
225 
226 
227 //*************************************************************************************************
244 template< typename MT1 // Type of the left-hand side dense matrix
245  , bool SO // Storage order of the left-hand side dense matrix
246  , typename MT2 > // Type of the right-hand side sparse matrix
247 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
248 {
250 
251  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
252 
253  typedef typename MT1::ElementType ET1;
254  typedef typename MT2::ElementType ET2;
255  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
256 
257  const size_t threads ( TheThreadBackend::size() );
258  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
259  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
260 
261  for( size_t i=0UL; i<threads; ++i )
262  {
263  const size_t row( i*rowsPerThread );
264 
265  if( row >= (~lhs).rows() )
266  continue;
267 
268  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
269  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
270  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
271  }
272 
273  TheThreadBackend::wait();
274 }
276 //*************************************************************************************************
277 
278 
279 //*************************************************************************************************
296 template< typename MT1 // Type of the left-hand side dense matrix
297  , bool SO // Storage order of the left-hand side dense matrix
298  , typename MT2 > // Type of the right-hand side sparse matrix
299 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
300 {
302 
303  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
304 
305  typedef typename MT1::ElementType ET1;
306  typedef typename MT2::ElementType ET2;
307  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
308 
309  const size_t threads ( TheThreadBackend::size() );
310  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
311  const size_t colsPerThread( (~lhs).columns() / threads + addon );
312 
313  for( size_t i=0UL; i<threads; ++i )
314  {
315  const size_t column( i*colsPerThread );
316 
317  if( column >= (~lhs).columns() )
318  continue;
319 
320  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
321  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
322  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
323  }
324 
325  TheThreadBackend::wait();
326 }
328 //*************************************************************************************************
329 
330 
331 //*************************************************************************************************
349 template< typename MT1 // Type of the left-hand side dense matrix
350  , bool SO1 // Storage order of the left-hand side dense matrix
351  , typename MT2 // Type of the right-hand side matrix
352  , bool SO2 > // Storage order of the right-hand side matrix
353 inline typename EnableIf< And< IsDenseMatrix<MT1>
354  , Or< Not< IsSMPAssignable<MT1> >
355  , Not< IsSMPAssignable<MT2> > > > >::Type
356  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
357 {
359 
360  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
361  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
362 
363  assign( ~lhs, ~rhs );
364 }
366 //*************************************************************************************************
367 
368 
369 //*************************************************************************************************
387 template< typename MT1 // Type of the left-hand side dense matrix
388  , bool SO1 // Storage order of the left-hand side dense matrix
389  , typename MT2 // Type of the right-hand side matrix
390  , bool SO2 > // Storage order of the right-hand side matrix
391 inline typename EnableIf< And< IsDenseMatrix<MT1>
392  , IsSMPAssignable<MT1>
393  , IsSMPAssignable<MT2> > >::Type
394  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
395 {
397 
400 
401  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
402  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
403 
405  {
406  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
407  assign( ~lhs, ~rhs );
408  }
409  else {
410  smpAssign_backend( ~lhs, ~rhs );
411  }
412  }
413 }
415 //*************************************************************************************************
416 
417 
418 
419 
420 //=================================================================================================
421 //
422 // ADDITION ASSIGNMENT
423 //
424 //=================================================================================================
425 
426 //*************************************************************************************************
443 template< typename MT1 // Type of the left-hand side dense matrix
444  , bool SO // Storage order of the left-hand side dense matrix
445  , typename MT2 > // Type of the right-hand side dense matrix
446 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
447 {
449 
450  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
451 
452  typedef typename MT1::ElementType ET1;
453  typedef typename MT2::ElementType ET2;
454  typedef IntrinsicTrait<typename MT1::ElementType> IT;
455  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
456  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
457 
458  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
459  const bool lhsAligned ( (~lhs).isAligned() );
460  const bool rhsAligned ( (~rhs).isAligned() );
461 
462  const size_t threads ( TheThreadBackend::size() );
463  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
464  const size_t equalShare ( (~lhs).rows() / threads + addon );
465  const size_t rest ( equalShare & ( IT::size - 1UL ) );
466  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
467 
468  for( size_t i=0UL; i<threads; ++i )
469  {
470  const size_t row( i*rowsPerThread );
471 
472  if( row >= (~lhs).rows() )
473  continue;
474 
475  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
476 
477  if( vectorizable && lhsAligned && rhsAligned ) {
478  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
479  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
480  }
481  else if( vectorizable && lhsAligned ) {
482  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
483  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
484  }
485  else if( vectorizable && rhsAligned ) {
486  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
487  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
488  }
489  else {
490  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
491  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
492  }
493  }
494 
495  TheThreadBackend::wait();
496 }
498 //*************************************************************************************************
499 
500 
501 //*************************************************************************************************
518 template< typename MT1 // Type of the left-hand side dense matrix
519  , bool SO // Storage order of the left-hand side dense matrix
520  , typename MT2 > // Type of the right-hand side dense matrix
521 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
522 {
524 
525  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
526 
527  typedef typename MT1::ElementType ET1;
528  typedef typename MT2::ElementType ET2;
529  typedef IntrinsicTrait<typename MT1::ElementType> IT;
530  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
531  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
532 
533  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
534  const bool lhsAligned ( (~lhs).isAligned() );
535  const bool rhsAligned ( (~rhs).isAligned() );
536 
537  const size_t threads ( TheThreadBackend::size() );
538  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
539  const size_t equalShare ( (~lhs).columns() / threads + addon );
540  const size_t rest ( equalShare & ( IT::size - 1UL ) );
541  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
542 
543  for( size_t i=0UL; i<threads; ++i )
544  {
545  const size_t column( i*colsPerThread );
546 
547  if( column >= (~lhs).columns() )
548  continue;
549 
550  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
551 
552  if( vectorizable && lhsAligned && rhsAligned ) {
553  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
554  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
555  }
556  else if( vectorizable && lhsAligned ) {
557  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
558  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
559  }
560  else if( vectorizable && rhsAligned ) {
561  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
562  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
563  }
564  else {
565  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
566  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
567  }
568  }
569 
570  TheThreadBackend::wait();
571 }
573 //*************************************************************************************************
574 
575 
576 //*************************************************************************************************
593 template< typename MT1 // Type of the left-hand side dense matrix
594  , bool SO // Storage order of the left-hand side dense matrix
595  , typename MT2 > // Type of the right-hand side sparse matrix
596 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
597 {
599 
600  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
601 
602  typedef typename MT1::ElementType ET1;
603  typedef typename MT2::ElementType ET2;
604  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
605 
606  const size_t threads ( TheThreadBackend::size() );
607  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
608  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
609 
610  for( size_t i=0UL; i<threads; ++i )
611  {
612  const size_t row( i*rowsPerThread );
613 
614  if( row >= (~lhs).rows() )
615  continue;
616 
617  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
618  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
619  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
620  }
621 
622  TheThreadBackend::wait();
623 }
625 //*************************************************************************************************
626 
627 
628 //*************************************************************************************************
645 template< typename MT1 // Type of the left-hand side dense matrix
646  , bool SO // Storage order of the left-hand side dense matrix
647  , typename MT2 > // Type of the right-hand side sparse matrix
648 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
649 {
651 
652  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
653 
654  typedef typename MT1::ElementType ET1;
655  typedef typename MT2::ElementType ET2;
656  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
657 
658  const size_t threads ( TheThreadBackend::size() );
659  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
660  const size_t colsPerThread( (~lhs).columns() / threads + addon );
661 
662  for( size_t i=0UL; i<threads; ++i )
663  {
664  const size_t column( i*colsPerThread );
665 
666  if( column >= (~lhs).columns() )
667  continue;
668 
669  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
670  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
671  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
672  }
673 
674  TheThreadBackend::wait();
675 }
677 //*************************************************************************************************
678 
679 
680 //*************************************************************************************************
699 template< typename MT1 // Type of the left-hand side dense matrix
700  , bool SO1 // Storage order of the left-hand side dense matrix
701  , typename MT2 // Type of the right-hand side matrix
702  , bool SO2 > // Storage order of the right-hand side matrix
703 inline typename EnableIf< And< IsDenseMatrix<MT1>
704  , Or< Not< IsSMPAssignable<MT1> >
705  , Not< IsSMPAssignable<MT2> > > > >::Type
706  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
707 {
709 
710  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
711  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
712 
713  addAssign( ~lhs, ~rhs );
714 }
716 //*************************************************************************************************
717 
718 
719 //*************************************************************************************************
737 template< typename MT1 // Type of the left-hand side dense matrix
738  , bool SO1 // Storage order of the left-hand side dense matrix
739  , typename MT2 // Type of the right-hand side matrix
740  , bool SO2 > // Storage order of the right-hand side matrix
741 inline typename EnableIf< And< IsDenseMatrix<MT1>
742  , IsSMPAssignable<MT1>
743  , IsSMPAssignable<MT2> > >::Type
744  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
745 {
747 
750 
751  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
752  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
753 
755  {
756  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
757  addAssign( ~lhs, ~rhs );
758  }
759  else {
760  smpAddAssign_backend( ~lhs, ~rhs );
761  }
762  }
763 }
765 //*************************************************************************************************
766 
767 
768 
769 
770 //=================================================================================================
771 //
772 // SUBTRACTION ASSIGNMENT
773 //
774 //=================================================================================================
775 
776 //*************************************************************************************************
793 template< typename MT1 // Type of the left-hand side dense matrix
794  , bool SO // Storage order of the left-hand side dense matrix
795  , typename MT2 > // Type of the right-hand side dense matrix
796 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
797 {
799 
800  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
801 
802  typedef typename MT1::ElementType ET1;
803  typedef typename MT2::ElementType ET2;
804  typedef IntrinsicTrait<typename MT1::ElementType> IT;
805  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
806  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
807 
808  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
809  const bool lhsAligned ( (~lhs).isAligned() );
810  const bool rhsAligned ( (~rhs).isAligned() );
811 
812  const size_t threads ( TheThreadBackend::size() );
813  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
814  const size_t equalShare ( (~lhs).rows() / threads + addon );
815  const size_t rest ( equalShare & ( IT::size - 1UL ) );
816  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
817 
818  for( size_t i=0UL; i<threads; ++i )
819  {
820  const size_t row( i*rowsPerThread );
821 
822  if( row >= (~lhs).rows() )
823  continue;
824 
825  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
826 
827  if( vectorizable && lhsAligned && rhsAligned ) {
828  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
829  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
830  }
831  else if( vectorizable && lhsAligned ) {
832  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
833  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
834  }
835  else if( vectorizable && rhsAligned ) {
836  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
837  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
838  }
839  else {
840  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
841  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
842  }
843  }
844 
845  TheThreadBackend::wait();
846 }
848 //*************************************************************************************************
849 
850 
851 //*************************************************************************************************
868 template< typename MT1 // Type of the left-hand side dense matrix
869  , bool SO // Storage order of the left-hand side dense matrix
870  , typename MT2 > // Type of the right-hand side dense matrix
871 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
872 {
874 
875  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
876 
877  typedef typename MT1::ElementType ET1;
878  typedef typename MT2::ElementType ET2;
879  typedef IntrinsicTrait<typename MT1::ElementType> IT;
880  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
881  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
882 
883  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
884  const bool lhsAligned ( (~lhs).isAligned() );
885  const bool rhsAligned ( (~rhs).isAligned() );
886 
887  const size_t threads ( TheThreadBackend::size() );
888  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
889  const size_t equalShare ( (~lhs).columns() / threads + addon );
890  const size_t rest ( equalShare & ( IT::size - 1UL ) );
891  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
892 
893  for( size_t i=0UL; i<threads; ++i )
894  {
895  const size_t column( i*colsPerThread );
896 
897  if( column >= (~lhs).columns() )
898  continue;
899 
900  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
901 
902  if( vectorizable && lhsAligned && rhsAligned ) {
903  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
904  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
905  }
906  else if( vectorizable && lhsAligned ) {
907  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
908  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
909  }
910  else if( vectorizable && rhsAligned ) {
911  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
912  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
913  }
914  else {
915  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
916  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
917  }
918  }
919 
920  TheThreadBackend::wait();
921 }
923 //*************************************************************************************************
924 
925 
926 //*************************************************************************************************
943 template< typename MT1 // Type of the left-hand side dense matrix
944  , bool SO // Storage order of the left-hand side dense matrix
945  , typename MT2 > // Type of the right-hand side sparse matrix
946 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
947 {
949 
950  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
951 
952  typedef typename MT1::ElementType ET1;
953  typedef typename MT2::ElementType ET2;
954  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
955 
956  const size_t threads ( TheThreadBackend::size() );
957  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
958  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
959 
960  for( size_t i=0UL; i<threads; ++i )
961  {
962  const size_t row( i*rowsPerThread );
963 
964  if( row >= (~lhs).rows() )
965  continue;
966 
967  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
968  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
969  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
970  }
971 
972  TheThreadBackend::wait();
973 }
975 //*************************************************************************************************
976 
977 
978 //*************************************************************************************************
995 template< typename MT1 // Type of the left-hand side dense matrix
996  , bool SO // Storage order of the left-hand side dense matrix
997  , typename MT2 > // Type of the right-hand side sparse matrix
998 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
999 {
1001 
1002  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
1003 
1004  typedef typename MT1::ElementType ET1;
1005  typedef typename MT2::ElementType ET2;
1006  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
1007 
1008  const size_t threads ( TheThreadBackend::size() );
1009  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
1010  const size_t colsPerThread( (~lhs).columns() / threads + addon );
1011 
1012  for( size_t i=0UL; i<threads; ++i )
1013  {
1014  const size_t column( i*colsPerThread );
1015 
1016  if( column >= (~lhs).columns() )
1017  continue;
1018 
1019  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
1020  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
1021  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
1022  }
1023 
1024  TheThreadBackend::wait();
1025 }
1027 //*************************************************************************************************
1028 
1029 
1030 //*************************************************************************************************
1049 template< typename MT1 // Type of the left-hand side dense matrix
1050  , bool SO1 // Storage order of the left-hand side dense matrix
1051  , typename MT2 // Type of the right-hand side matrix
1052  , bool SO2 > // Storage order of the right-hand side matrix
1053 inline typename EnableIf< And< IsDenseMatrix<MT1>
1054  , Or< Not< IsSMPAssignable<MT1> >
1055  , Not< IsSMPAssignable<MT2> > > > >::Type
1056  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1057 {
1059 
1060  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1061  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1062 
1063  subAssign( ~lhs, ~rhs );
1064 }
1066 //*************************************************************************************************
1067 
1068 
1069 //*************************************************************************************************
1088 template< typename MT1 // Type of the left-hand side dense matrix
1089  , bool SO1 // Storage order of the left-hand side dense matrix
1090  , typename MT2 // Type of the right-hand side matrix
1091  , bool SO2 > // Storage order of the right-hand side matrix
1092 inline typename EnableIf< And< IsDenseMatrix<MT1>
1093  , IsSMPAssignable<MT1>
1094  , IsSMPAssignable<MT2> > >::Type
1095  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1096 {
1098 
1101 
1102  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1103  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1104 
1106  {
1107  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1108  subAssign( ~lhs, ~rhs );
1109  }
1110  else {
1111  smpSubAssign_backend( ~lhs, ~rhs );
1112  }
1113  }
1114 }
1116 //*************************************************************************************************
1117 
1118 
1119 
1120 
1121 //=================================================================================================
1122 //
1123 // MULTIPLICATION ASSIGNMENT
1124 //
1125 //=================================================================================================
1126 
1127 //*************************************************************************************************
1144 template< typename MT1 // Type of the left-hand side dense matrix
1145  , bool SO1 // Storage order of the left-hand side matrix
1146  , typename MT2 // Type of the right-hand side matrix
1147  , bool SO2 > // Storage order of the right-hand side matrix
1148 inline typename EnableIf< IsDenseMatrix<MT1> >::Type
1149  smpMultAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1150 {
1152 
1153  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1154  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1155 
1156  multAssign( ~lhs, ~rhs );
1157 }
1159 //*************************************************************************************************
1160 
1161 
1162 
1163 
1164 //=================================================================================================
1165 //
1166 // COMPILE TIME CONSTRAINT
1167 //
1168 //=================================================================================================
1169 
1170 //*************************************************************************************************
1172 namespace {
1173 
1175 
1176 }
1178 //*************************************************************************************************
1179 
1180 } // namespace blaze
1181 
1182 #endif
Header file for mathematical functions.
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
Header file for the IsSame and IsStrictlySame type traits.
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:316
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename ColumnExprTrait< MT >::Type >::Type column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:103
Header file for the And class template.
Header file for the intrinsic trait.
Header file for the SparseMatrix base class.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
Header file for the complete DenseSubmatrix implementation.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
Header file for the Or class template.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Not class template.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:245
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename RowExprTrait< MT >::Type >::Type row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:103
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:212
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
Header file for the complete SparseSubmatrix implementation.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:212
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849