All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
47 #include <blaze/math/Functions.h>
56 #include <blaze/system/SMP.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/DisableIf.h>
59 #include <blaze/util/EnableIf.h>
61 #include <blaze/util/mpl/And.h>
64 
65 
66 namespace blaze {
67 
68 //=================================================================================================
69 //
70 // PLAIN ASSIGNMENT
71 //
72 //=================================================================================================
73 
74 //*************************************************************************************************
91 template< typename MT1 // Type of the left-hand side dense matrix
92  , bool SO // Storage order of the left-hand side dense matrix
93  , typename MT2 > // Type of the right-hand side dense matrix
94 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
95 {
97 
98  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
99 
100  typedef typename MT1::ElementType ET1;
101  typedef typename MT2::ElementType ET2;
102  typedef IntrinsicTrait<typename MT1::ElementType> IT;
103  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
104  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
105 
106  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
107  const bool lhsAligned ( (~lhs).isAligned() );
108  const bool rhsAligned ( (~rhs).isAligned() );
109 
110  const size_t threads ( TheThreadBackend::size() );
111  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
112  const size_t equalShare ( (~lhs).rows() / threads + addon );
113  const size_t rest ( equalShare & ( IT::size - 1UL ) );
114  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
115 
116  for( size_t i=0UL; i<threads; ++i )
117  {
118  const size_t row( i*rowsPerThread );
119 
120  if( row >= (~lhs).rows() )
121  continue;
122 
123  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
124 
125  if( vectorizable && lhsAligned && rhsAligned ) {
126  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
127  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
128  }
129  else if( vectorizable && lhsAligned ) {
130  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
131  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
132  }
133  else if( vectorizable && rhsAligned ) {
134  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
135  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
136  }
137  else {
138  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
139  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
140  }
141  }
142 
143  TheThreadBackend::wait();
144 }
146 //*************************************************************************************************
147 
148 
149 //*************************************************************************************************
166 template< typename MT1 // Type of the left-hand side dense matrix
167  , bool SO // Storage order of the left-hand side dense matrix
168  , typename MT2 > // Type of the right-hand side dense matrix
169 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
170 {
172 
173  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
174 
175  typedef typename MT1::ElementType ET1;
176  typedef typename MT2::ElementType ET2;
177  typedef IntrinsicTrait<typename MT1::ElementType> IT;
178  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
179  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
180 
181  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
182  const bool lhsAligned ( (~lhs).isAligned() );
183  const bool rhsAligned ( (~rhs).isAligned() );
184 
185  const size_t threads ( TheThreadBackend::size() );
186  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
187  const size_t equalShare ( (~lhs).columns() / threads + addon );
188  const size_t rest ( equalShare & ( IT::size - 1UL ) );
189  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
190 
191  for( size_t i=0UL; i<threads; ++i )
192  {
193  const size_t column( i*colsPerThread );
194 
195  if( column >= (~lhs).columns() )
196  continue;
197 
198  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
199 
200  if( vectorizable && lhsAligned && rhsAligned ) {
201  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
202  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
203  }
204  else if( vectorizable && lhsAligned ) {
205  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
206  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
207  }
208  else if( vectorizable && rhsAligned ) {
209  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
210  TheThreadBackend::scheduleAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
211  }
212  else {
213  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
214  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
215  }
216  }
217 
218  TheThreadBackend::wait();
219 }
221 //*************************************************************************************************
222 
223 
224 //*************************************************************************************************
241 template< typename MT1 // Type of the left-hand side dense matrix
242  , bool SO // Storage order of the left-hand side dense matrix
243  , typename MT2 > // Type of the right-hand side sparse matrix
244 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
245 {
247 
248  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
249 
250  typedef typename MT1::ElementType ET1;
251  typedef typename MT2::ElementType ET2;
252  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
253 
254  const size_t threads ( TheThreadBackend::size() );
255  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
256  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
257 
258  for( size_t i=0UL; i<threads; ++i )
259  {
260  const size_t row( i*rowsPerThread );
261 
262  if( row >= (~lhs).rows() )
263  continue;
264 
265  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
266  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
267  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
268  }
269 
270  TheThreadBackend::wait();
271 }
273 //*************************************************************************************************
274 
275 
276 //*************************************************************************************************
293 template< typename MT1 // Type of the left-hand side dense matrix
294  , bool SO // Storage order of the left-hand side dense matrix
295  , typename MT2 > // Type of the right-hand side sparse matrix
296 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
297 {
299 
300  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
301 
302  typedef typename MT1::ElementType ET1;
303  typedef typename MT2::ElementType ET2;
304  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
305 
306  const size_t threads ( TheThreadBackend::size() );
307  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
308  const size_t colsPerThread( (~lhs).columns() / threads + addon );
309 
310  for( size_t i=0UL; i<threads; ++i )
311  {
312  const size_t column( i*colsPerThread );
313 
314  if( column >= (~lhs).columns() )
315  continue;
316 
317  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
318  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
319  TheThreadBackend::scheduleAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
320  }
321 
322  TheThreadBackend::wait();
323 }
325 //*************************************************************************************************
326 
327 
328 //*************************************************************************************************
346 template< typename MT1 // Type of the left-hand side dense matrix
347  , bool SO1 // Storage order of the left-hand side matrix
348  , typename MT2 // Type of the right-hand side matrix
349  , bool SO2 > // Storage order of the right-hand side matrix
350 inline typename DisableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
351  smpAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
352 {
354 
355  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
356  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
357 
358  assign( ~lhs, ~rhs );
359 }
361 //*************************************************************************************************
362 
363 
364 //*************************************************************************************************
382 template< typename MT1 // Type of the left-hand side dense matrix
383  , bool SO1 // Storage order of the left-hand side matrix
384  , typename MT2 // Type of the right-hand side matrix
385  , bool SO2 > // Storage order of the right-hand side matrix
386 inline typename EnableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
387  smpAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
388 {
390 
393 
394  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
395  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
396 
398  {
399  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
400  assign( ~lhs, ~rhs );
401  }
402  else {
403  smpAssign_backend( ~lhs, ~rhs );
404  }
405  }
406 }
408 //*************************************************************************************************
409 
410 
411 
412 
413 //=================================================================================================
414 //
415 // ADDITION ASSIGNMENT
416 //
417 //=================================================================================================
418 
419 //*************************************************************************************************
436 template< typename MT1 // Type of the left-hand side dense matrix
437  , bool SO // Storage order of the left-hand side dense matrix
438  , typename MT2 > // Type of the right-hand side dense matrix
439 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
440 {
442 
443  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
444 
445  typedef typename MT1::ElementType ET1;
446  typedef typename MT2::ElementType ET2;
447  typedef IntrinsicTrait<typename MT1::ElementType> IT;
448  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
449  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
450 
451  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
452  const bool lhsAligned ( (~lhs).isAligned() );
453  const bool rhsAligned ( (~rhs).isAligned() );
454 
455  const size_t threads ( TheThreadBackend::size() );
456  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
457  const size_t equalShare ( (~lhs).rows() / threads + addon );
458  const size_t rest ( equalShare & ( IT::size - 1UL ) );
459  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
460 
461  for( size_t i=0UL; i<threads; ++i )
462  {
463  const size_t row( i*rowsPerThread );
464 
465  if( row >= (~lhs).rows() )
466  continue;
467 
468  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
469 
470  if( vectorizable && lhsAligned && rhsAligned ) {
471  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
472  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
473  }
474  else if( vectorizable && lhsAligned ) {
475  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
476  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
477  }
478  else if( vectorizable && rhsAligned ) {
479  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
480  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
481  }
482  else {
483  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
484  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
485  }
486  }
487 
488  TheThreadBackend::wait();
489 }
491 //*************************************************************************************************
492 
493 
494 //*************************************************************************************************
511 template< typename MT1 // Type of the left-hand side dense matrix
512  , bool SO // Storage order of the left-hand side dense matrix
513  , typename MT2 > // Type of the right-hand side dense matrix
514 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
515 {
517 
518  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
519 
520  typedef typename MT1::ElementType ET1;
521  typedef typename MT2::ElementType ET2;
522  typedef IntrinsicTrait<typename MT1::ElementType> IT;
523  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
524  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
525 
526  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
527  const bool lhsAligned ( (~lhs).isAligned() );
528  const bool rhsAligned ( (~rhs).isAligned() );
529 
530  const size_t threads ( TheThreadBackend::size() );
531  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
532  const size_t equalShare ( (~lhs).columns() / threads + addon );
533  const size_t rest ( equalShare & ( IT::size - 1UL ) );
534  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
535 
536  for( size_t i=0UL; i<threads; ++i )
537  {
538  const size_t column( i*colsPerThread );
539 
540  if( column >= (~lhs).columns() )
541  continue;
542 
543  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
544 
545  if( vectorizable && lhsAligned && rhsAligned ) {
546  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
547  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
548  }
549  else if( vectorizable && lhsAligned ) {
550  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
551  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
552  }
553  else if( vectorizable && rhsAligned ) {
554  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
555  TheThreadBackend::scheduleAddAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
556  }
557  else {
558  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
559  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
560  }
561  }
562 
563  TheThreadBackend::wait();
564 }
566 //*************************************************************************************************
567 
568 
569 //*************************************************************************************************
586 template< typename MT1 // Type of the left-hand side dense matrix
587  , bool SO // Storage order of the left-hand side dense matrix
588  , typename MT2 > // Type of the right-hand side sparse matrix
589 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
590 {
592 
593  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
594 
595  typedef typename MT1::ElementType ET1;
596  typedef typename MT2::ElementType ET2;
597  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
598 
599  const size_t threads ( TheThreadBackend::size() );
600  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
601  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
602 
603  for( size_t i=0UL; i<threads; ++i )
604  {
605  const size_t row( i*rowsPerThread );
606 
607  if( row >= (~lhs).rows() )
608  continue;
609 
610  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
611  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
612  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
613  }
614 
615  TheThreadBackend::wait();
616 }
618 //*************************************************************************************************
619 
620 
621 //*************************************************************************************************
638 template< typename MT1 // Type of the left-hand side dense matrix
639  , bool SO // Storage order of the left-hand side dense matrix
640  , typename MT2 > // Type of the right-hand side sparse matrix
641 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
642 {
644 
645  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
646 
647  typedef typename MT1::ElementType ET1;
648  typedef typename MT2::ElementType ET2;
649  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
650 
651  const size_t threads ( TheThreadBackend::size() );
652  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
653  const size_t colsPerThread( (~lhs).columns() / threads + addon );
654 
655  for( size_t i=0UL; i<threads; ++i )
656  {
657  const size_t column( i*colsPerThread );
658 
659  if( column >= (~lhs).columns() )
660  continue;
661 
662  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
663  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
664  TheThreadBackend::scheduleAddAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
665  }
666 
667  TheThreadBackend::wait();
668 }
670 //*************************************************************************************************
671 
672 
673 //*************************************************************************************************
692 template< typename MT1 // Type of the left-hand side dense matrix
693  , bool SO1 // Storage order of the left-hand side matrix
694  , typename MT2 // Type of the right-hand side matrix
695  , bool SO2 > // Storage order of the right-hand side matrix
696 inline typename DisableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
697  smpAddAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
698 {
700 
701  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
702  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
703 
704  addAssign( ~lhs, ~rhs );
705 }
707 //*************************************************************************************************
708 
709 
710 //*************************************************************************************************
728 template< typename MT1 // Type of the left-hand side dense matrix
729  , bool SO1 // Storage order of the left-hand side matrix
730  , typename MT2 // Type of the right-hand side matrix
731  , bool SO2 > // Storage order of the right-hand side matrix
732 inline typename EnableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
733  smpAddAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
734 {
736 
739 
740  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
741  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
742 
744  {
745  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
746  addAssign( ~lhs, ~rhs );
747  }
748  else {
749  smpAddAssign_backend( ~lhs, ~rhs );
750  }
751  }
752 }
754 //*************************************************************************************************
755 
756 
757 
758 
759 //=================================================================================================
760 //
761 // SUBTRACTION ASSIGNMENT
762 //
763 //=================================================================================================
764 
765 //*************************************************************************************************
782 template< typename MT1 // Type of the left-hand side dense matrix
783  , bool SO // Storage order of the left-hand side dense matrix
784  , typename MT2 > // Type of the right-hand side dense matrix
785 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
786 {
788 
789  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
790 
791  typedef typename MT1::ElementType ET1;
792  typedef typename MT2::ElementType ET2;
793  typedef IntrinsicTrait<typename MT1::ElementType> IT;
794  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
795  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
796 
797  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
798  const bool lhsAligned ( (~lhs).isAligned() );
799  const bool rhsAligned ( (~rhs).isAligned() );
800 
801  const size_t threads ( TheThreadBackend::size() );
802  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
803  const size_t equalShare ( (~lhs).rows() / threads + addon );
804  const size_t rest ( equalShare & ( IT::size - 1UL ) );
805  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
806 
807  for( size_t i=0UL; i<threads; ++i )
808  {
809  const size_t row( i*rowsPerThread );
810 
811  if( row >= (~lhs).rows() )
812  continue;
813 
814  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
815 
816  if( vectorizable && lhsAligned && rhsAligned ) {
817  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
818  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
819  }
820  else if( vectorizable && lhsAligned ) {
821  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
822  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
823  }
824  else if( vectorizable && rhsAligned ) {
825  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
826  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
827  }
828  else {
829  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
830  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
831  }
832  }
833 
834  TheThreadBackend::wait();
835 }
837 //*************************************************************************************************
838 
839 
840 //*************************************************************************************************
857 template< typename MT1 // Type of the left-hand side dense matrix
858  , bool SO // Storage order of the left-hand side dense matrix
859  , typename MT2 > // Type of the right-hand side dense matrix
860 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
861 {
863 
864  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
865 
866  typedef typename MT1::ElementType ET1;
867  typedef typename MT2::ElementType ET2;
868  typedef IntrinsicTrait<typename MT1::ElementType> IT;
869  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
870  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
871 
872  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
873  const bool lhsAligned ( (~lhs).isAligned() );
874  const bool rhsAligned ( (~rhs).isAligned() );
875 
876  const size_t threads ( TheThreadBackend::size() );
877  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
878  const size_t equalShare ( (~lhs).columns() / threads + addon );
879  const size_t rest ( equalShare & ( IT::size - 1UL ) );
880  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
881 
882  for( size_t i=0UL; i<threads; ++i )
883  {
884  const size_t column( i*colsPerThread );
885 
886  if( column >= (~lhs).columns() )
887  continue;
888 
889  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
890 
891  if( vectorizable && lhsAligned && rhsAligned ) {
892  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
893  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
894  }
895  else if( vectorizable && lhsAligned ) {
896  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
897  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
898  }
899  else if( vectorizable && rhsAligned ) {
900  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
901  TheThreadBackend::scheduleSubAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
902  }
903  else {
904  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
905  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
906  }
907  }
908 
909  TheThreadBackend::wait();
910 }
912 //*************************************************************************************************
913 
914 
915 //*************************************************************************************************
932 template< typename MT1 // Type of the left-hand side dense matrix
933  , bool SO // Storage order of the left-hand side dense matrix
934  , typename MT2 > // Type of the right-hand side sparse matrix
935 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
936 {
938 
939  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
940 
941  typedef typename MT1::ElementType ET1;
942  typedef typename MT2::ElementType ET2;
943  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
944 
945  const size_t threads ( TheThreadBackend::size() );
946  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
947  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
948 
949  for( size_t i=0UL; i<threads; ++i )
950  {
951  const size_t row( i*rowsPerThread );
952 
953  if( row >= (~lhs).rows() )
954  continue;
955 
956  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
957  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
958  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
959  }
960 
961  TheThreadBackend::wait();
962 }
964 //*************************************************************************************************
965 
966 
967 //*************************************************************************************************
984 template< typename MT1 // Type of the left-hand side dense matrix
985  , bool SO // Storage order of the left-hand side dense matrix
986  , typename MT2 > // Type of the right-hand side sparse matrix
987 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
988 {
990 
991  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
992 
993  typedef typename MT1::ElementType ET1;
994  typedef typename MT2::ElementType ET2;
995  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
996 
997  const size_t threads ( TheThreadBackend::size() );
998  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
999  const size_t colsPerThread( (~lhs).columns() / threads + addon );
1000 
1001  for( size_t i=0UL; i<threads; ++i )
1002  {
1003  const size_t column( i*colsPerThread );
1004 
1005  if( column >= (~lhs).columns() )
1006  continue;
1007 
1008  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
1009  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
1010  TheThreadBackend::scheduleSubAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
1011  }
1012 
1013  TheThreadBackend::wait();
1014 }
1016 //*************************************************************************************************
1017 
1018 
1019 //*************************************************************************************************
1038 template< typename MT1 // Type of the left-hand side dense matrix
1039  , bool SO1 // Storage order of the left-hand side matrix
1040  , typename MT2 // Type of the right-hand side matrix
1041  , bool SO2 > // Storage order of the right-hand side matrix
1042 inline typename DisableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
1043  smpSubAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1044 {
1046 
1047  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1048  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1049 
1050  subAssign( ~lhs, ~rhs );
1051 }
1053 //*************************************************************************************************
1054 
1055 
1056 //*************************************************************************************************
1075 template< typename MT1 // Type of the left-hand side dense matrix
1076  , bool SO1 // Storage order of the left-hand side matrix
1077  , typename MT2 // Type of the right-hand side matrix
1078  , bool SO2 > // Storage order of the right-hand side matrix
1079 inline typename EnableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
1080  smpSubAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1081 {
1083 
1086 
1087  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1088  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1089 
1091  {
1092  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1093  subAssign( ~lhs, ~rhs );
1094  }
1095  else {
1096  smpSubAssign_backend( ~lhs, ~rhs );
1097  }
1098  }
1099 }
1101 //*************************************************************************************************
1102 
1103 
1104 
1105 
1106 //=================================================================================================
1107 //
1108 // MULTIPLICATION ASSIGNMENT
1109 //
1110 //=================================================================================================
1111 
1112 //*************************************************************************************************
1129 template< typename MT1 // Type of the left-hand side dense matrix
1130  , bool SO1 // Storage order of the left-hand side matrix
1131  , typename MT2 // Type of the right-hand side matrix
1132  , bool SO2 > // Storage order of the right-hand side matrix
1133 inline void smpMultAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1134 {
1136 
1137  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1138  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1139 
1140  multAssign( ~lhs, ~rhs );
1141 }
1143 //*************************************************************************************************
1144 
1145 
1146 
1147 
1148 //=================================================================================================
1149 //
1150 // COMPILE TIME CONSTRAINT
1151 //
1152 //=================================================================================================
1153 
1154 //*************************************************************************************************
1156 namespace {
1157 
1159 
1160 }
1162 //*************************************************************************************************
1163 
1164 } // namespace blaze
1165 
1166 #endif
Header file for mathematical functions.
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:179
Header file for the IsSame and IsStrictlySame type traits.
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename ColumnExprTrait< MT >::Type >::Type column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:103
Header file for the And class template.
Header file for the intrinsic trait.
Header file for the SparseMatrix base class.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Header file for the DisableIf class template.
Header file for the complete DenseSubmatrix implementation.
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:361
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:245
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename RowExprTrait< MT >::Type >::Type row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:103
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:212
Header file for the SubmatrixExprTrait class template.
Header file for run time assertion macros.
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
Header file for the complete SparseSubmatrix implementation.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:212
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:170
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:154
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.