DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
48 #include <blaze/math/Functions.h>
57 #include <blaze/system/SMP.h>
58 #include <blaze/util/Assert.h>
59 #include <blaze/util/EnableIf.h>
61 #include <blaze/util/mpl/And.h>
62 #include <blaze/util/mpl/Not.h>
63 #include <blaze/util/mpl/Or.h>
65 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // PLAIN ASSIGNMENT
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
93 template< typename MT1 // Type of the left-hand side dense matrix
94  , bool SO // Storage order of the left-hand side dense matrix
95  , typename MT2 > // Type of the right-hand side dense matrix
96 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
97 {
99 
100  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
101 
102  typedef typename MT1::ElementType ET1;
103  typedef typename MT2::ElementType ET2;
104  typedef IntrinsicTrait<typename MT1::ElementType> IT;
105  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
106  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
107 
108  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
109  const bool lhsAligned ( (~lhs).isAligned() );
110  const bool rhsAligned ( (~rhs).isAligned() );
111 
112  const int threads ( omp_get_num_threads() );
113  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
114  const size_t equalShare ( (~lhs).rows() / threads + addon );
115  const size_t rest ( equalShare & ( IT::size - 1UL ) );
116  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
117 
118 #pragma omp for schedule(dynamic,1) nowait
119  for( int i=0UL; i<threads; ++i )
120  {
121  const size_t row( i*rowsPerThread );
122 
123  if( row >= (~lhs).rows() )
124  continue;
125 
126  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
127 
128  if( vectorizable && lhsAligned && rhsAligned ) {
129  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
130  assign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
131  }
132  else if( vectorizable && lhsAligned ) {
133  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
134  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
135  }
136  else if( vectorizable && rhsAligned ) {
137  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
138  assign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
139  }
140  else {
141  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
142  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
143  }
144  }
145 }
147 //*************************************************************************************************
148 
149 
150 //*************************************************************************************************
166 template< typename MT1 // Type of the left-hand side dense matrix
167  , bool SO // Storage order of the left-hand side dense matrix
168  , typename MT2 > // Type of the right-hand side dense matrix
169 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
170 {
172 
173  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
174 
175  typedef typename MT1::ElementType ET1;
176  typedef typename MT2::ElementType ET2;
177  typedef IntrinsicTrait<typename MT1::ElementType> IT;
178  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
179  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
180 
181  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
182  const bool lhsAligned ( (~lhs).isAligned() );
183  const bool rhsAligned ( (~rhs).isAligned() );
184 
185  const int threads ( omp_get_num_threads() );
186  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
187  const size_t equalShare ( (~lhs).columns() / threads + addon );
188  const size_t rest ( equalShare & ( IT::size - 1UL ) );
189  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
190 
191 #pragma omp for schedule(dynamic,1) nowait
192  for( int i=0UL; i<threads; ++i )
193  {
194  const size_t column( i*colsPerThread );
195 
196  if( column >= (~lhs).columns() )
197  continue;
198 
199  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
200 
201  if( vectorizable && lhsAligned && rhsAligned ) {
202  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
203  assign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
204  }
205  else if( vectorizable && lhsAligned ) {
206  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
207  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
208  }
209  else if( vectorizable && rhsAligned ) {
210  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
211  assign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
212  }
213  else {
214  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
215  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
216  }
217  }
218 }
220 //*************************************************************************************************
221 
222 
223 //*************************************************************************************************
239 template< typename MT1 // Type of the left-hand side dense matrix
240  , bool SO // Storage order of the left-hand side dense matrix
241  , typename MT2 > // Type of the right-hand side sparse matrix
242 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
243 {
245 
246  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
247 
248  typedef typename MT1::ElementType ET1;
249  typedef typename MT2::ElementType ET2;
250  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
251 
252  const int threads ( omp_get_num_threads() );
253  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
254  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
255 
256 #pragma omp for schedule(dynamic,1) nowait
257  for( int i=0UL; i<threads; ++i )
258  {
259  const size_t row( i*rowsPerThread );
260 
261  if( row >= (~lhs).rows() )
262  continue;
263 
264  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
265  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
266  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
267  }
268 }
270 //*************************************************************************************************
271 
272 
273 //*************************************************************************************************
289 template< typename MT1 // Type of the left-hand side dense matrix
290  , bool SO // Storage order of the left-hand side dense matrix
291  , typename MT2 > // Type of the right-hand side sparse matrix
292 void smpAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
293 {
295 
296  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
297 
298  typedef typename MT1::ElementType ET1;
299  typedef typename MT2::ElementType ET2;
300  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
301 
302  const int threads ( omp_get_num_threads() );
303  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
304  const size_t colsPerThread( (~lhs).columns() / threads + addon );
305 
306 #pragma omp for schedule(dynamic,1) nowait
307  for( int i=0UL; i<threads; ++i )
308  {
309  const size_t column( i*colsPerThread );
310 
311  if( column >= (~lhs).columns() )
312  continue;
313 
314  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
315  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
316  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
317  }
318 }
320 //*************************************************************************************************
321 
322 
323 //*************************************************************************************************
341 template< typename MT1 // Type of the left-hand side dense matrix
342  , bool SO1 // Storage order of the left-hand side dense matrix
343  , typename MT2 // Type of the right-hand side matrix
344  , bool SO2 > // Storage order of the right-hand side matrix
345 inline typename EnableIf< And< IsDenseMatrix<MT1>
346  , Or< Not< IsSMPAssignable<MT1> >
347  , Not< IsSMPAssignable<MT2> > > > >::Type
348  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
349 {
351 
352  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
353  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
354 
355  assign( ~lhs, ~rhs );
356 }
358 //*************************************************************************************************
359 
360 
361 //*************************************************************************************************
379 template< typename MT1 // Type of the left-hand side dense matrix
380  , bool SO1 // Storage order of the left-hand side dense matrix
381  , typename MT2 // Type of the right-hand side matrix
382  , bool SO2 > // Storage order of the right-hand side matrix
383 inline typename EnableIf< And< IsDenseMatrix<MT1>
384  , IsSMPAssignable<MT1>
385  , IsSMPAssignable<MT2> > >::Type
386  smpAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
387 {
389 
392 
393  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
394  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
395 
397  {
398  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
399  assign( ~lhs, ~rhs );
400  }
401  else {
402 #pragma omp parallel shared( lhs, rhs )
403  smpAssign_backend( ~lhs, ~rhs );
404  }
405  }
406 }
408 //*************************************************************************************************
409 
410 
411 
412 
413 //=================================================================================================
414 //
415 // ADDITION ASSIGNMENT
416 //
417 //=================================================================================================
418 
419 //*************************************************************************************************
436 template< typename MT1 // Type of the left-hand side dense matrix
437  , bool SO // Storage order of the left-hand side dense matrix
438  , typename MT2 > // Type of the right-hand side dense matrix
439 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
440 {
442 
443  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
444 
445  typedef typename MT1::ElementType ET1;
446  typedef typename MT2::ElementType ET2;
447  typedef IntrinsicTrait<typename MT1::ElementType> IT;
448  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
449  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
450 
451  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
452  const bool lhsAligned ( (~lhs).isAligned() );
453  const bool rhsAligned ( (~rhs).isAligned() );
454 
455  const int threads ( omp_get_num_threads() );
456  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
457  const size_t equalShare ( (~lhs).rows() / threads + addon );
458  const size_t rest ( equalShare & ( IT::size - 1UL ) );
459  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
460 
461 #pragma omp for schedule(dynamic,1) nowait
462  for( int i=0UL; i<threads; ++i )
463  {
464  const size_t row( i*rowsPerThread );
465 
466  if( row >= (~lhs).rows() )
467  continue;
468 
469  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
470 
471  if( vectorizable && lhsAligned && rhsAligned ) {
472  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
473  addAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
474  }
475  else if( vectorizable && lhsAligned ) {
476  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
477  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
478  }
479  else if( vectorizable && rhsAligned ) {
480  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
481  addAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
482  }
483  else {
484  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
485  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
486  }
487  }
488 }
490 //*************************************************************************************************
491 
492 
493 //*************************************************************************************************
510 template< typename MT1 // Type of the left-hand side dense matrix
511  , bool SO // Storage order of the left-hand side dense matrix
512  , typename MT2 > // Type of the right-hand side dense matrix
513 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
514 {
516 
517  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
518 
519  typedef typename MT1::ElementType ET1;
520  typedef typename MT2::ElementType ET2;
521  typedef IntrinsicTrait<typename MT1::ElementType> IT;
522  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
523  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
524 
525  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
526  const bool lhsAligned ( (~lhs).isAligned() );
527  const bool rhsAligned ( (~rhs).isAligned() );
528 
529  const int threads ( omp_get_num_threads() );
530  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
531  const size_t equalShare ( (~lhs).columns() / threads + addon );
532  const size_t rest ( equalShare & ( IT::size - 1UL ) );
533  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
534 
535 #pragma omp for schedule(dynamic,1) nowait
536  for( int i=0UL; i<threads; ++i )
537  {
538  const size_t column( i*colsPerThread );
539 
540  if( column >= (~lhs).columns() )
541  continue;
542 
543  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
544 
545  if( vectorizable && lhsAligned && rhsAligned ) {
546  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
547  addAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
548  }
549  else if( vectorizable && lhsAligned ) {
550  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
551  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
552  }
553  else if( vectorizable && rhsAligned ) {
554  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
555  addAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
556  }
557  else {
558  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
559  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
560  }
561  }
562 }
564 //*************************************************************************************************
565 
566 
567 //*************************************************************************************************
584 template< typename MT1 // Type of the left-hand side dense matrix
585  , bool SO // Storage order of the left-hand side dense matrix
586  , typename MT2 > // Type of the right-hand side sparse matrix
587 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
588 {
590 
591  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
592 
593  typedef typename MT1::ElementType ET1;
594  typedef typename MT2::ElementType ET2;
595  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
596 
597  const int threads ( omp_get_num_threads() );
598  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
599  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
600 
601 #pragma omp for schedule(dynamic,1) nowait
602  for( int i=0UL; i<threads; ++i )
603  {
604  const size_t row( i*rowsPerThread );
605 
606  if( row >= (~lhs).rows() )
607  continue;
608 
609  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
610  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
611  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
612  }
613 }
615 //*************************************************************************************************
616 
617 
618 //*************************************************************************************************
635 template< typename MT1 // Type of the left-hand side dense matrix
636  , bool SO // Storage order of the left-hand side dense matrix
637  , typename MT2 > // Type of the right-hand side sparse matrix
638 void smpAddAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
639 {
641 
642  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
643 
644  typedef typename MT1::ElementType ET1;
645  typedef typename MT2::ElementType ET2;
646  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
647 
648  const int threads ( omp_get_num_threads() );
649  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
650  const size_t colsPerThread( (~lhs).columns() / threads + addon );
651 
652 #pragma omp for schedule(dynamic,1) nowait
653  for( int i=0UL; i<threads; ++i )
654  {
655  const size_t column( i*colsPerThread );
656 
657  if( column >= (~lhs).columns() )
658  continue;
659 
660  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
661  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
662  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
663  }
664 }
666 //*************************************************************************************************
667 
668 
669 //*************************************************************************************************
687 template< typename MT1 // Type of the left-hand side dense matrix
688  , bool SO1 // Storage order of the left-hand side dense matrix
689  , typename MT2 // Type of the right-hand side matrix
690  , bool SO2 > // Storage order of the right-hand side matrix
691 inline typename EnableIf< And< IsDenseMatrix<MT1>
692  , Or< Not< IsSMPAssignable<MT1> >
693  , Not< IsSMPAssignable<MT2> > > > >::Type
694  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
695 {
697 
698  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
699  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
700 
701  addAssign( ~lhs, ~rhs );
702 }
704 //*************************************************************************************************
705 
706 
707 //*************************************************************************************************
725 template< typename MT1 // Type of the left-hand side dense matrix
726  , bool SO1 // Storage order of the left-hand side dense matrix
727  , typename MT2 // Type of the right-hand side matrix
728  , bool SO2 > // Storage order of the right-hand side matrix
729 inline typename EnableIf< And< IsDenseMatrix<MT1>
730  , IsSMPAssignable<MT1>
731  , IsSMPAssignable<MT2> > >::Type
732  smpAddAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
733 {
735 
738 
739  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
740  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
741 
743  {
744  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
745  addAssign( ~lhs, ~rhs );
746  }
747  else {
748 #pragma omp parallel shared( lhs, rhs )
749  smpAddAssign_backend( ~lhs, ~rhs );
750  }
751  }
752 }
754 //*************************************************************************************************
755 
756 
757 
758 
759 //=================================================================================================
760 //
761 // SUBTRACTION ASSIGNMENT
762 //
763 //=================================================================================================
764 
765 //*************************************************************************************************
782 template< typename MT1 // Type of the left-hand side dense matrix
783  , bool SO // Storage order of the left-hand side dense matrix
784  , typename MT2 > // Type of the right-hand side dense matrix
785 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
786 {
788 
789  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
790 
791  typedef typename MT1::ElementType ET1;
792  typedef typename MT2::ElementType ET2;
793  typedef IntrinsicTrait<typename MT1::ElementType> IT;
794  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
795  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
796 
797  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
798  const bool lhsAligned ( (~lhs).isAligned() );
799  const bool rhsAligned ( (~rhs).isAligned() );
800 
801  const int threads ( omp_get_num_threads() );
802  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
803  const size_t equalShare ( (~lhs).rows() / threads + addon );
804  const size_t rest ( equalShare & ( IT::size - 1UL ) );
805  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
806 
807 #pragma omp for schedule(dynamic,1) nowait
808  for( int i=0UL; i<threads; ++i )
809  {
810  const size_t row( i*rowsPerThread );
811 
812  if( row >= (~lhs).rows() )
813  continue;
814 
815  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
816 
817  if( vectorizable && lhsAligned && rhsAligned ) {
818  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
819  subAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
820  }
821  else if( vectorizable && lhsAligned ) {
822  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
823  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
824  }
825  else if( vectorizable && rhsAligned ) {
826  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
827  subAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
828  }
829  else {
830  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
831  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
832  }
833  }
834 }
836 //*************************************************************************************************
837 
838 
839 //*************************************************************************************************
856 template< typename MT1 // Type of the left-hand side dense matrix
857  , bool SO // Storage order of the left-hand side dense matrix
858  , typename MT2 > // Type of the right-hand side dense matrix
859 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
860 {
862 
863  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
864 
865  typedef typename MT1::ElementType ET1;
866  typedef typename MT2::ElementType ET2;
867  typedef IntrinsicTrait<typename MT1::ElementType> IT;
868  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
869  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
870 
871  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
872  const bool lhsAligned ( (~lhs).isAligned() );
873  const bool rhsAligned ( (~rhs).isAligned() );
874 
875  const int threads ( omp_get_num_threads() );
876  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
877  const size_t equalShare ( (~lhs).columns() / threads + addon );
878  const size_t rest ( equalShare & ( IT::size - 1UL ) );
879  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
880 
881 #pragma omp for schedule(dynamic,1) nowait
882  for( int i=0UL; i<threads; ++i )
883  {
884  const size_t column( i*colsPerThread );
885 
886  if( column >= (~lhs).columns() )
887  continue;
888 
889  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
890 
891  if( vectorizable && lhsAligned && rhsAligned ) {
892  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
893  subAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
894  }
895  else if( vectorizable && lhsAligned ) {
896  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
897  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
898  }
899  else if( vectorizable && rhsAligned ) {
900  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
901  subAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
902  }
903  else {
904  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
905  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
906  }
907  }
908 }
910 //*************************************************************************************************
911 
912 
913 //*************************************************************************************************
930 template< typename MT1 // Type of the left-hand side dense matrix
931  , bool SO // Storage order of the left-hand side dense matrix
932  , typename MT2 > // Type of the right-hand side sparse matrix
933 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
934 {
936 
937  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
938 
939  typedef typename MT1::ElementType ET1;
940  typedef typename MT2::ElementType ET2;
941  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
942 
943  const int threads ( omp_get_num_threads() );
944  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
945  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
946 
947 #pragma omp for schedule(dynamic,1) nowait
948  for( int i=0UL; i<threads; ++i )
949  {
950  const size_t row( i*rowsPerThread );
951 
952  if( row >= (~lhs).rows() )
953  continue;
954 
955  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
956  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
957  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
958  }
959 }
961 //*************************************************************************************************
962 
963 
964 //*************************************************************************************************
981 template< typename MT1 // Type of the left-hand side dense matrix
982  , bool SO // Storage order of the left-hand side dense matrix
983  , typename MT2 > // Type of the right-hand side sparse matrix
984 void smpSubAssign_backend( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
985 {
987 
988  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
989 
990  typedef typename MT1::ElementType ET1;
991  typedef typename MT2::ElementType ET2;
992  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
993 
994  const int threads ( omp_get_num_threads() );
995  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
996  const size_t colsPerThread( (~lhs).columns() / threads + addon );
997 
998 #pragma omp for schedule(dynamic,1) nowait
999  for( int i=0UL; i<threads; ++i )
1000  {
1001  const size_t column( i*colsPerThread );
1002 
1003  if( column >= (~lhs).columns() )
1004  continue;
1005 
1006  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
1007  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
1008  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
1009  }
1010 }
1012 //*************************************************************************************************
1013 
1014 
1015 //*************************************************************************************************
1033 template< typename MT1 // Type of the left-hand side dense matrix
1034  , bool SO1 // Storage order of the left-hand side dense matrix
1035  , typename MT2 // Type of the right-hand side matrix
1036  , bool SO2 > // Storage order of the right-hand side matrix
1037 inline typename EnableIf< And< IsDenseMatrix<MT1>
1038  , Or< Not< IsSMPAssignable<MT1> >
1039  , Not< IsSMPAssignable<MT2> > > > >::Type
1040  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1041 {
1043 
1044  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1045  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1046 
1047  subAssign( ~lhs, ~rhs );
1048 }
1050 //*************************************************************************************************
1051 
1052 
1053 //*************************************************************************************************
1071 template< typename MT1 // Type of the left-hand side dense matrix
1072  , bool SO1 // Storage order of the left-hand side dense matrix
1073  , typename MT2 // Type of the right-hand side matrix
1074  , bool SO2 > // Storage order of the right-hand side matrix
1075 inline typename EnableIf< And< IsDenseMatrix<MT1>
1076  , IsSMPAssignable<MT1>
1077  , IsSMPAssignable<MT2> > >::Type
1078  smpSubAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1079 {
1081 
1084 
1085  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1086  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1087 
1089  {
1090  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1091  subAssign( ~lhs, ~rhs );
1092  }
1093  else {
1094 #pragma omp parallel shared( lhs, rhs )
1095  smpSubAssign_backend( ~lhs, ~rhs );
1096  }
1097  }
1098 }
1100 //*************************************************************************************************
1101 
1102 
1103 
1104 
1105 //=================================================================================================
1106 //
1107 // MULTIPLICATION ASSIGNMENT
1108 //
1109 //=================================================================================================
1110 
1111 //*************************************************************************************************
1127 template< typename MT1 // Type of the left-hand side dense matrix
1128  , bool SO1 // Storage order of the left-hand side matrix
1129  , typename MT2 // Type of the right-hand side matrix
1130  , bool SO2 > // Storage order of the right-hand side matrix
1131 inline typename EnableIf< IsDenseMatrix<MT1> >::Type
1132  smpMultAssign( Matrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1133 {
1135 
1136  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1137  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1138 
1139  multAssign( ~lhs, ~rhs );
1140 }
1142 //*************************************************************************************************
1143 
1144 
1145 
1146 
1147 //=================================================================================================
1148 //
1149 // COMPILE TIME CONSTRAINT
1150 //
1151 //=================================================================================================
1152 
1153 //*************************************************************************************************
1155 namespace {
1156 
1158 
1159 }
1161 //*************************************************************************************************
1162 
1163 } // namespace blaze
1164 
1165 #endif
Header file for mathematical functions.
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
Header file for the IsSame and IsStrictlySame type traits.
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:316
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename ColumnExprTrait< MT >::Type >::Type column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:103
Header file for the And class template.
Header file for the intrinsic trait.
Header file for the SparseMatrix base class.
Header file for the matrix storage order types.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:118
Header file for the complete DenseSubmatrix implementation.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
Header file for the Or class template.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:245
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename RowExprTrait< MT >::Type >::Type row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:103
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:212
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
Header file for the complete SparseSubmatrix implementation.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:212
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849