All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
48 #include <blaze/math/Functions.h>
56 #include <blaze/system/SMP.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/DisableIf.h>
59 #include <blaze/util/EnableIf.h>
61 #include <blaze/util/mpl/And.h>
64 
65 
66 namespace blaze {
67 
68 //=================================================================================================
69 //
70 // PLAIN ASSIGNMENT
71 //
72 //=================================================================================================
73 
74 //*************************************************************************************************
90 template< typename MT1 // Type of the left-hand side dense matrix
91  , bool SO // Storage order of the left-hand side dense matrix
92  , typename MT2 > // Type of the right-hand side dense matrix
93 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
94 {
96 
97  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
98 
99  typedef typename MT1::ElementType ET1;
100  typedef typename MT2::ElementType ET2;
101  typedef IntrinsicTrait<typename MT1::ElementType> IT;
102  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
103  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
104 
105  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
106  const bool lhsAligned ( (~lhs).isAligned() );
107  const bool rhsAligned ( (~rhs).isAligned() );
108 
109  const int threads ( omp_get_num_threads() );
110  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
111  const size_t equalShare ( (~lhs).rows() / threads + addon );
112  const size_t rest ( equalShare & ( IT::size - 1UL ) );
113  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
114 
115 #pragma omp for schedule(dynamic,1) nowait
116  for( int i=0UL; i<threads; ++i )
117  {
118  const size_t row( i*rowsPerThread );
119 
120  if( row >= (~lhs).rows() )
121  continue;
122 
123  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
124 
125  if( vectorizable && lhsAligned && rhsAligned ) {
126  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
127  assign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
128  }
129  else if( vectorizable && lhsAligned ) {
130  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
131  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
132  }
133  else if( vectorizable && rhsAligned ) {
134  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
135  assign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
136  }
137  else {
138  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
139  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
140  }
141  }
142 }
144 //*************************************************************************************************
145 
146 
147 //*************************************************************************************************
163 template< typename MT1 // Type of the left-hand side dense matrix
164  , bool SO // Storage order of the left-hand side dense matrix
165  , typename MT2 > // Type of the right-hand side dense matrix
166 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
167 {
169 
170  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
171 
172  typedef typename MT1::ElementType ET1;
173  typedef typename MT2::ElementType ET2;
174  typedef IntrinsicTrait<typename MT1::ElementType> IT;
175  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
176  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
177 
178  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
179  const bool lhsAligned ( (~lhs).isAligned() );
180  const bool rhsAligned ( (~rhs).isAligned() );
181 
182  const int threads ( omp_get_num_threads() );
183  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
184  const size_t equalShare ( (~lhs).columns() / threads + addon );
185  const size_t rest ( equalShare & ( IT::size - 1UL ) );
186  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
187 
188 #pragma omp for schedule(dynamic,1) nowait
189  for( int i=0UL; i<threads; ++i )
190  {
191  const size_t column( i*colsPerThread );
192 
193  if( column >= (~lhs).columns() )
194  continue;
195 
196  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
197 
198  if( vectorizable && lhsAligned && rhsAligned ) {
199  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
200  assign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
201  }
202  else if( vectorizable && lhsAligned ) {
203  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
204  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
205  }
206  else if( vectorizable && rhsAligned ) {
207  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
208  assign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
209  }
210  else {
211  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
212  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
213  }
214  }
215 }
217 //*************************************************************************************************
218 
219 
220 //*************************************************************************************************
236 template< typename MT1 // Type of the left-hand side dense matrix
237  , bool SO // Storage order of the left-hand side dense matrix
238  , typename MT2 > // Type of the right-hand side sparse matrix
239 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
240 {
242 
243  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
244 
245  typedef typename MT1::ElementType ET1;
246  typedef typename MT2::ElementType ET2;
247  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
248 
249  const int threads ( omp_get_num_threads() );
250  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
251  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
252 
253 #pragma omp for schedule(dynamic,1) nowait
254  for( int i=0UL; i<threads; ++i )
255  {
256  const size_t row( i*rowsPerThread );
257 
258  if( row >= (~lhs).rows() )
259  continue;
260 
261  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
262  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
263  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
264  }
265 }
267 //*************************************************************************************************
268 
269 
270 //*************************************************************************************************
286 template< typename MT1 // Type of the left-hand side dense matrix
287  , bool SO // Storage order of the left-hand side dense matrix
288  , typename MT2 > // Type of the right-hand side sparse matrix
289 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
290 {
292 
293  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
294 
295  typedef typename MT1::ElementType ET1;
296  typedef typename MT2::ElementType ET2;
297  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
298 
299  const int threads ( omp_get_num_threads() );
300  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
301  const size_t colsPerThread( (~lhs).columns() / threads + addon );
302 
303 #pragma omp for schedule(dynamic,1) nowait
304  for( int i=0UL; i<threads; ++i )
305  {
306  const size_t column( i*colsPerThread );
307 
308  if( column >= (~lhs).columns() )
309  continue;
310 
311  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
312  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
313  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
314  }
315 }
317 //*************************************************************************************************
318 
319 
320 //*************************************************************************************************
338 template< typename MT1 // Type of the left-hand side dense matrix
339  , bool SO1 // Storage order of the left-hand side matrix
340  , typename MT2 // Type of the right-hand side matrix
341  , bool SO2 > // Storage order of the right-hand side matrix
342 inline typename DisableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
343  smpAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
344 {
346 
347  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
348  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
349 
350  assign( ~lhs, ~rhs );
351 }
353 //*************************************************************************************************
354 
355 
356 //*************************************************************************************************
374 template< typename MT1 // Type of the left-hand side dense matrix
375  , bool SO1 // Storage order of the left-hand side matrix
376  , typename MT2 // Type of the right-hand side matrix
377  , bool SO2 > // Storage order of the right-hand side matrix
378 inline typename EnableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
379  smpAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
380 {
382 
385 
386  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
387  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
388 
390  {
391  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
392  assign( ~lhs, ~rhs );
393  }
394  else {
395 #pragma omp parallel shared( lhs, rhs )
396  smpAssign_backend( ~lhs, ~rhs );
397  }
398  }
399 }
401 //*************************************************************************************************
402 
403 
404 
405 
406 //=================================================================================================
407 //
408 // ADDITION ASSIGNMENT
409 //
410 //=================================================================================================
411 
412 //*************************************************************************************************
429 template< typename MT1 // Type of the left-hand side dense matrix
430  , bool SO // Storage order of the left-hand side dense matrix
431  , typename MT2 > // Type of the right-hand side dense matrix
432 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
433 {
435 
436  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
437 
438  typedef typename MT1::ElementType ET1;
439  typedef typename MT2::ElementType ET2;
440  typedef IntrinsicTrait<typename MT1::ElementType> IT;
441  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
442  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
443 
444  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
445  const bool lhsAligned ( (~lhs).isAligned() );
446  const bool rhsAligned ( (~rhs).isAligned() );
447 
448  const int threads ( omp_get_num_threads() );
449  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
450  const size_t equalShare ( (~lhs).rows() / threads + addon );
451  const size_t rest ( equalShare & ( IT::size - 1UL ) );
452  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
453 
454 #pragma omp for schedule(dynamic,1) nowait
455  for( int i=0UL; i<threads; ++i )
456  {
457  const size_t row( i*rowsPerThread );
458 
459  if( row >= (~lhs).rows() )
460  continue;
461 
462  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
463 
464  if( vectorizable && lhsAligned && rhsAligned ) {
465  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
466  addAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
467  }
468  else if( vectorizable && lhsAligned ) {
469  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
470  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
471  }
472  else if( vectorizable && rhsAligned ) {
473  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
474  addAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
475  }
476  else {
477  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
478  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
479  }
480  }
481 }
483 //*************************************************************************************************
484 
485 
486 //*************************************************************************************************
503 template< typename MT1 // Type of the left-hand side dense matrix
504  , bool SO // Storage order of the left-hand side dense matrix
505  , typename MT2 > // Type of the right-hand side dense matrix
506 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
507 {
509 
510  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
511 
512  typedef typename MT1::ElementType ET1;
513  typedef typename MT2::ElementType ET2;
514  typedef IntrinsicTrait<typename MT1::ElementType> IT;
515  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
516  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
517 
518  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
519  const bool lhsAligned ( (~lhs).isAligned() );
520  const bool rhsAligned ( (~rhs).isAligned() );
521 
522  const int threads ( omp_get_num_threads() );
523  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
524  const size_t equalShare ( (~lhs).columns() / threads + addon );
525  const size_t rest ( equalShare & ( IT::size - 1UL ) );
526  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
527 
528 #pragma omp for schedule(dynamic,1) nowait
529  for( int i=0UL; i<threads; ++i )
530  {
531  const size_t column( i*colsPerThread );
532 
533  if( column >= (~lhs).columns() )
534  continue;
535 
536  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
537 
538  if( vectorizable && lhsAligned && rhsAligned ) {
539  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
540  addAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
541  }
542  else if( vectorizable && lhsAligned ) {
543  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
544  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
545  }
546  else if( vectorizable && rhsAligned ) {
547  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
548  addAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
549  }
550  else {
551  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
552  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
553  }
554  }
555 }
557 //*************************************************************************************************
558 
559 
560 //*************************************************************************************************
577 template< typename MT1 // Type of the left-hand side dense matrix
578  , bool SO // Storage order of the left-hand side dense matrix
579  , typename MT2 > // Type of the right-hand side sparse matrix
580 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
581 {
583 
584  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
585 
586  typedef typename MT1::ElementType ET1;
587  typedef typename MT2::ElementType ET2;
588  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
589 
590  const int threads ( omp_get_num_threads() );
591  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
592  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
593 
594 #pragma omp for schedule(dynamic,1) nowait
595  for( int i=0UL; i<threads; ++i )
596  {
597  const size_t row( i*rowsPerThread );
598 
599  if( row >= (~lhs).rows() )
600  continue;
601 
602  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
603  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
604  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
605  }
606 }
608 //*************************************************************************************************
609 
610 
611 //*************************************************************************************************
628 template< typename MT1 // Type of the left-hand side dense matrix
629  , bool SO // Storage order of the left-hand side dense matrix
630  , typename MT2 > // Type of the right-hand side sparse matrix
631 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
632 {
634 
635  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
636 
637  typedef typename MT1::ElementType ET1;
638  typedef typename MT2::ElementType ET2;
639  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
640 
641  const int threads ( omp_get_num_threads() );
642  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
643  const size_t colsPerThread( (~lhs).columns() / threads + addon );
644 
645 #pragma omp for schedule(dynamic,1) nowait
646  for( int i=0UL; i<threads; ++i )
647  {
648  const size_t column( i*colsPerThread );
649 
650  if( column >= (~lhs).columns() )
651  continue;
652 
653  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
654  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
655  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
656  }
657 }
659 //*************************************************************************************************
660 
661 
662 //*************************************************************************************************
680 template< typename MT1 // Type of the left-hand side dense matrix
681  , bool SO1 // Storage order of the left-hand side matrix
682  , typename MT2 // Type of the right-hand side matrix
683  , bool SO2 > // Storage order of the right-hand side matrix
684 inline typename DisableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
685  smpAddAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
686 {
688 
689  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
690  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
691 
692  addAssign( ~lhs, ~rhs );
693 }
695 //*************************************************************************************************
696 
697 
698 //*************************************************************************************************
716 template< typename MT1 // Type of the left-hand side dense matrix
717  , bool SO1 // Storage order of the left-hand side matrix
718  , typename MT2 // Type of the right-hand side matrix
719  , bool SO2 > // Storage order of the right-hand side matrix
720 inline typename EnableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
721  smpAddAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
722 {
724 
727 
728  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
729  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
730 
732  {
733  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
734  addAssign( ~lhs, ~rhs );
735  }
736  else {
737 #pragma omp parallel shared( lhs, rhs )
738  smpAddAssign_backend( ~lhs, ~rhs );
739  }
740  }
741 }
743 //*************************************************************************************************
744 
745 
746 
747 
748 //=================================================================================================
749 //
750 // SUBTRACTION ASSIGNMENT
751 //
752 //=================================================================================================
753 
754 //*************************************************************************************************
771 template< typename MT1 // Type of the left-hand side dense matrix
772  , bool SO // Storage order of the left-hand side dense matrix
773  , typename MT2 > // Type of the right-hand side dense matrix
774 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
775 {
777 
778  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
779 
780  typedef typename MT1::ElementType ET1;
781  typedef typename MT2::ElementType ET2;
782  typedef IntrinsicTrait<typename MT1::ElementType> IT;
783  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
784  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
785 
786  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
787  const bool lhsAligned ( (~lhs).isAligned() );
788  const bool rhsAligned ( (~rhs).isAligned() );
789 
790  const int threads ( omp_get_num_threads() );
791  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
792  const size_t equalShare ( (~lhs).rows() / threads + addon );
793  const size_t rest ( equalShare & ( IT::size - 1UL ) );
794  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
795 
796 #pragma omp for schedule(dynamic,1) nowait
797  for( int i=0UL; i<threads; ++i )
798  {
799  const size_t row( i*rowsPerThread );
800 
801  if( row >= (~lhs).rows() )
802  continue;
803 
804  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
805 
806  if( vectorizable && lhsAligned && rhsAligned ) {
807  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
808  subAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
809  }
810  else if( vectorizable && lhsAligned ) {
811  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
812  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
813  }
814  else if( vectorizable && rhsAligned ) {
815  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
816  subAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
817  }
818  else {
819  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
820  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
821  }
822  }
823 }
825 //*************************************************************************************************
826 
827 
828 //*************************************************************************************************
845 template< typename MT1 // Type of the left-hand side dense matrix
846  , bool SO // Storage order of the left-hand side dense matrix
847  , typename MT2 > // Type of the right-hand side dense matrix
848 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
849 {
851 
852  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
853 
854  typedef typename MT1::ElementType ET1;
855  typedef typename MT2::ElementType ET2;
856  typedef IntrinsicTrait<typename MT1::ElementType> IT;
857  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
858  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
859 
860  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
861  const bool lhsAligned ( (~lhs).isAligned() );
862  const bool rhsAligned ( (~rhs).isAligned() );
863 
864  const int threads ( omp_get_num_threads() );
865  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
866  const size_t equalShare ( (~lhs).columns() / threads + addon );
867  const size_t rest ( equalShare & ( IT::size - 1UL ) );
868  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
869 
870 #pragma omp for schedule(dynamic,1) nowait
871  for( int i=0UL; i<threads; ++i )
872  {
873  const size_t column( i*colsPerThread );
874 
875  if( column >= (~lhs).columns() )
876  continue;
877 
878  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
879 
880  if( vectorizable && lhsAligned && rhsAligned ) {
881  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
882  subAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
883  }
884  else if( vectorizable && lhsAligned ) {
885  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
886  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
887  }
888  else if( vectorizable && rhsAligned ) {
889  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
890  subAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
891  }
892  else {
893  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
894  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
895  }
896  }
897 }
899 //*************************************************************************************************
900 
901 
902 //*************************************************************************************************
919 template< typename MT1 // Type of the left-hand side dense matrix
920  , bool SO // Storage order of the left-hand side dense matrix
921  , typename MT2 > // Type of the right-hand side sparse matrix
922 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
923 {
925 
926  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
927 
928  typedef typename MT1::ElementType ET1;
929  typedef typename MT2::ElementType ET2;
930  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
931 
932  const int threads ( omp_get_num_threads() );
933  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
934  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
935 
936 #pragma omp for schedule(dynamic,1) nowait
937  for( int i=0UL; i<threads; ++i )
938  {
939  const size_t row( i*rowsPerThread );
940 
941  if( row >= (~lhs).rows() )
942  continue;
943 
944  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
945  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
946  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
947  }
948 }
950 //*************************************************************************************************
951 
952 
953 //*************************************************************************************************
970 template< typename MT1 // Type of the left-hand side dense matrix
971  , bool SO // Storage order of the left-hand side dense matrix
972  , typename MT2 > // Type of the right-hand side sparse matrix
973 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
974 {
976 
977  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
978 
979  typedef typename MT1::ElementType ET1;
980  typedef typename MT2::ElementType ET2;
981  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
982 
983  const int threads ( omp_get_num_threads() );
984  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
985  const size_t colsPerThread( (~lhs).columns() / threads + addon );
986 
987 #pragma omp for schedule(dynamic,1) nowait
988  for( int i=0UL; i<threads; ++i )
989  {
990  const size_t column( i*colsPerThread );
991 
992  if( column >= (~lhs).columns() )
993  continue;
994 
995  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
996  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
997  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
998  }
999 }
1001 //*************************************************************************************************
1002 
1003 
1004 //*************************************************************************************************
1022 template< typename MT1 // Type of the left-hand side dense matrix
1023  , bool SO1 // Storage order of the left-hand side matrix
1024  , typename MT2 // Type of the right-hand side matrix
1025  , bool SO2 > // Storage order of the right-hand side matrix
1026 inline typename DisableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
1027  smpSubAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1028 {
1030 
1031  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1032  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1033 
1034  subAssign( ~lhs, ~rhs );
1035 }
1037 //*************************************************************************************************
1038 
1039 
1040 //*************************************************************************************************
1058 template< typename MT1 // Type of the left-hand side dense matrix
1059  , bool SO1 // Storage order of the left-hand side matrix
1060  , typename MT2 // Type of the right-hand side matrix
1061  , bool SO2 > // Storage order of the right-hand side matrix
1062 inline typename EnableIf< And< IsSMPAssignable<MT1>, IsSMPAssignable<MT2> > >::Type
1063  smpSubAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1064 {
1066 
1069 
1070  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1071  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1072 
1074  {
1075  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1076  subAssign( ~lhs, ~rhs );
1077  }
1078  else {
1079 #pragma omp parallel shared( lhs, rhs )
1080  smpSubAssign_backend( ~lhs, ~rhs );
1081  }
1082  }
1083 }
1085 //*************************************************************************************************
1086 
1087 
1088 
1089 
1090 //=================================================================================================
1091 //
1092 // MULTIPLICATION ASSIGNMENT
1093 //
1094 //=================================================================================================
1095 
1096 //*************************************************************************************************
1112 template< typename MT1 // Type of the left-hand side dense matrix
1113  , bool SO1 // Storage order of the left-hand side matrix
1114  , typename MT2 // Type of the right-hand side matrix
1115  , bool SO2 > // Storage order of the right-hand side matrix
1116 inline void smpMultAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1117 {
1119 
1120  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1121  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1122 
1123  multAssign( ~lhs, ~rhs );
1124 }
1126 //*************************************************************************************************
1127 
1128 
1129 
1130 
1131 //=================================================================================================
1132 //
1133 // COMPILE TIME CONSTRAINT
1134 //
1135 //=================================================================================================
1136 
1137 //*************************************************************************************************
1139 namespace {
1140 
1142 
1143 }
1145 //*************************************************************************************************
1146 
1147 } // namespace blaze
1148 
1149 #endif
Header file for mathematical functions.
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:179
Header file for the IsSame and IsStrictlySame type traits.
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename ColumnExprTrait< MT >::Type >::Type column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:103
Header file for the And class template.
Header file for the intrinsic trait.
Header file for the SparseMatrix base class.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Header file for the DisableIf class template.
Header file for the complete DenseSubmatrix implementation.
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
Header file for the serial section implementation.
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:361
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:245
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename RowExprTrait< MT >::Type >::Type row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:103
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:212
Header file for the SubmatrixExprTrait class template.
Header file for run time assertion macros.
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
Header file for the complete SparseSubmatrix implementation.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:212
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:170
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:154
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.