All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DenseMatrix.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEMATRIX_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
47 #include <blaze/math/Functions.h>
53 #include <blaze/system/OpenMP.h>
54 #include <blaze/util/Assert.h>
55 #include <blaze/util/EnableIf.h>
59 
60 
61 namespace blaze {
62 
63 //=================================================================================================
64 //
65 // GLOBAL FUNCTIONS
66 //
67 //=================================================================================================
68 
69 //*************************************************************************************************
85 template< typename MT1 // Type of the left-hand side dense matrix
86  , bool SO1 // Storage order of the left-hand side matrix
87  , typename MT2 // Type of the right-hand side matrix
88  , bool SO2 > // Storage order of the right-hand side matrix
89 inline void smpAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
90 {
92 
93  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
94  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
95  assign( ~lhs, ~rhs );
96 }
98 //*************************************************************************************************
99 
100 
101 //*************************************************************************************************
118 template< typename MT1 // Type of the left-hand side dense matrix
119  , bool SO // Storage order of the left-hand side dense matrix
120  , typename MT2 > // Type of the right-hand side dense matrix
121 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
122  smpAssign( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
123 {
125 
126  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
127  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
128 
129  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
130  assign( ~lhs, ~rhs );
131  return;
132  }
133 
134  typedef typename MT1::ElementType ET1;
135  typedef typename MT2::ElementType ET2;
136  typedef IntrinsicTrait<typename MT1::ElementType> IT;
137  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
138  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
139 
140  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
141  const bool lhsAligned ( (~lhs).isAligned() );
142  const bool rhsAligned ( (~rhs).isAligned() );
143 
144 #pragma omp parallel shared( lhs, rhs )
145  {
146  const int threads ( omp_get_num_threads() );
147  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
148  const size_t equalShare ( (~lhs).rows() / threads + addon );
149  const size_t rest ( equalShare & ( IT::size - 1UL ) );
150  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
151 
152 #pragma omp for schedule(dynamic,1) nowait
153  for( int i=0UL; i<threads; ++i )
154  {
155  const size_t row( i*rowsPerThread );
156 
157  if( row >= (~lhs).rows() )
158  continue;
159 
160  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
161 
162  if( vectorizable && lhsAligned && rhsAligned ) {
163  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
164  assign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
165  }
166  else if( vectorizable && lhsAligned ) {
167  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
168  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
169  }
170  else if( vectorizable && rhsAligned ) {
171  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
172  assign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
173  }
174  else {
175  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
176  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
177  }
178  }
179  }
180 }
182 //*************************************************************************************************
183 
184 
185 //*************************************************************************************************
202 template< typename MT1 // Type of the left-hand side dense matrix
203  , bool SO // Storage order of the left-hand side dense matrix
204  , typename MT2 > // Type of the right-hand side dense matrix
205 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
206  smpAssign( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
207 {
209 
210  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
211  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
212 
213  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
214  assign( ~lhs, ~rhs );
215  return;
216  }
217 
218  typedef typename MT1::ElementType ET1;
219  typedef typename MT2::ElementType ET2;
220  typedef IntrinsicTrait<typename MT1::ElementType> IT;
221  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
222  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
223 
224  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
225  const bool lhsAligned ( (~lhs).isAligned() );
226  const bool rhsAligned ( (~rhs).isAligned() );
227 
228 #pragma omp parallel shared( lhs, rhs )
229  {
230  const int threads ( omp_get_num_threads() );
231  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
232  const size_t equalShare ( (~lhs).columns() / threads + addon );
233  const size_t rest ( equalShare & ( IT::size - 1UL ) );
234  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
235 
236 #pragma omp for schedule(dynamic,1) nowait
237  for( int i=0UL; i<threads; ++i )
238  {
239  const size_t column( i*colsPerThread );
240 
241  if( column >= (~lhs).columns() )
242  continue;
243 
244  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
245 
246  if( vectorizable && lhsAligned && rhsAligned ) {
247  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
248  assign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
249  }
250  else if( vectorizable && lhsAligned ) {
251  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
252  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
253  }
254  else if( vectorizable && rhsAligned ) {
255  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
256  assign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
257  }
258  else {
259  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
260  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
261  }
262  }
263  }
264 }
266 //*************************************************************************************************
267 
268 
269 //*************************************************************************************************
286 template< typename MT1 // Type of the left-hand side dense matrix
287  , bool SO // Storage order of the left-hand side dense matrix
288  , typename MT2 > // Type of the right-hand side sparse matrix
289 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
290  smpAssign( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
291 {
293 
294  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
295  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
296 
297  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
298  assign( ~lhs, ~rhs );
299  return;
300  }
301 
302  typedef typename MT1::ElementType ET1;
303  typedef typename MT2::ElementType ET2;
304  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
305 
306 #pragma omp parallel shared( lhs, rhs )
307  {
308  const int threads ( omp_get_num_threads() );
309  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
310  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
311 
312 #pragma omp for schedule(dynamic,1) nowait
313  for( int i=0UL; i<threads; ++i )
314  {
315  const size_t row( i*rowsPerThread );
316 
317  if( row >= (~lhs).rows() )
318  continue;
319 
320  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
321  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
322  assign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
323  }
324  }
325 }
327 //*************************************************************************************************
328 
329 
330 //*************************************************************************************************
347 template< typename MT1 // Type of the left-hand side dense matrix
348  , bool SO // Storage order of the left-hand side dense matrix
349  , typename MT2 > // Type of the right-hand side sparse matrix
350 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
351  smpAssign( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
352 {
354 
355  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
356  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
357 
358  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
359  assign( ~lhs, ~rhs );
360  return;
361  }
362 
363  typedef typename MT1::ElementType ET1;
364  typedef typename MT2::ElementType ET2;
365  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
366 
367 #pragma omp parallel shared( lhs, rhs )
368  {
369  const int threads ( omp_get_num_threads() );
370  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
371  const size_t colsPerThread( (~lhs).columns() / threads + addon );
372 
373 #pragma omp for schedule(dynamic,1) nowait
374  for( int i=0UL; i<threads; ++i )
375  {
376  const size_t column( i*colsPerThread );
377 
378  if( column >= (~lhs).columns() )
379  continue;
380 
381  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
382  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
383  assign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
384  }
385  }
386 }
388 //*************************************************************************************************
389 
390 
391 //*************************************************************************************************
408 template< typename MT1 // Type of the left-hand side dense matrix
409  , bool SO1 // Storage order of the left-hand side matrix
410  , typename MT2 // Type of the right-hand side matrix
411  , bool SO2 > // Storage order of the right-hand side matrix
412 inline void smpAddAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
413 {
415 
416  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
417  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
418  addAssign( ~lhs, ~rhs );
419 }
421 //*************************************************************************************************
422 
423 
424 //*************************************************************************************************
441 template< typename MT1 // Type of the left-hand side dense matrix
442  , bool SO // Storage order of the left-hand side dense matrix
443  , typename MT2 > // Type of the right-hand side dense matrix
444 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
445  smpAddAssign( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
446 {
448 
449  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
450  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
451 
452  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
453  addAssign( ~lhs, ~rhs );
454  return;
455  }
456 
457  typedef typename MT1::ElementType ET1;
458  typedef typename MT2::ElementType ET2;
459  typedef IntrinsicTrait<typename MT1::ElementType> IT;
460  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
461  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
462 
463  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
464  const bool lhsAligned ( (~lhs).isAligned() );
465  const bool rhsAligned ( (~rhs).isAligned() );
466 
467 #pragma omp parallel shared( lhs, rhs )
468  {
469  const int threads ( omp_get_num_threads() );
470  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
471  const size_t equalShare ( (~lhs).rows() / threads + addon );
472  const size_t rest ( equalShare & ( IT::size - 1UL ) );
473  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
474 
475 #pragma omp for schedule(dynamic,1) nowait
476  for( int i=0UL; i<threads; ++i )
477  {
478  const size_t row( i*rowsPerThread );
479 
480  if( row >= (~lhs).rows() )
481  continue;
482 
483  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
484 
485  if( vectorizable && lhsAligned && rhsAligned ) {
486  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
487  addAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
488  }
489  else if( vectorizable && lhsAligned ) {
490  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
491  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
492  }
493  else if( vectorizable && rhsAligned ) {
494  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
495  addAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
496  }
497  else {
498  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
499  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
500  }
501  }
502  }
503 }
505 //*************************************************************************************************
506 
507 
508 //*************************************************************************************************
525 template< typename MT1 // Type of the left-hand side dense matrix
526  , bool SO // Storage order of the left-hand side dense matrix
527  , typename MT2 > // Type of the right-hand side dense matrix
528 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
529  smpAddAssign( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
530 {
532 
533  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
534  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
535 
536  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
537  addAssign( ~lhs, ~rhs );
538  return;
539  }
540 
541  typedef typename MT1::ElementType ET1;
542  typedef typename MT2::ElementType ET2;
543  typedef IntrinsicTrait<typename MT1::ElementType> IT;
544  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
545  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
546 
547  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
548  const bool lhsAligned ( (~lhs).isAligned() );
549  const bool rhsAligned ( (~rhs).isAligned() );
550 
551 #pragma omp parallel shared( lhs, rhs )
552  {
553  const int threads ( omp_get_num_threads() );
554  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
555  const size_t equalShare ( (~lhs).columns() / threads + addon );
556  const size_t rest ( equalShare & ( IT::size - 1UL ) );
557  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
558 
559 #pragma omp for schedule(dynamic,1) nowait
560  for( int i=0UL; i<threads; ++i )
561  {
562  const size_t column( i*colsPerThread );
563 
564  if( column >= (~lhs).columns() )
565  continue;
566 
567  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
568 
569  if( vectorizable && lhsAligned && rhsAligned ) {
570  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
571  addAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
572  }
573  else if( vectorizable && lhsAligned ) {
574  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
575  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
576  }
577  else if( vectorizable && rhsAligned ) {
578  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
579  addAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
580  }
581  else {
582  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
583  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
584  }
585  }
586  }
587 }
589 //*************************************************************************************************
590 
591 
592 //*************************************************************************************************
609 template< typename MT1 // Type of the left-hand side dense matrix
610  , bool SO // Storage order of the left-hand side dense matrix
611  , typename MT2 > // Type of the right-hand side sparse matrix
612 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
613  smpAddAssign( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
614 {
616 
617  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
618  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
619 
620  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
621  addAssign( ~lhs, ~rhs );
622  return;
623  }
624 
625  typedef typename MT1::ElementType ET1;
626  typedef typename MT2::ElementType ET2;
627  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
628 
629 #pragma omp parallel shared( lhs, rhs )
630  {
631  const int threads ( omp_get_num_threads() );
632  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
633  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
634 
635 #pragma omp for schedule(dynamic,1) nowait
636  for( int i=0UL; i<threads; ++i )
637  {
638  const size_t row( i*rowsPerThread );
639 
640  if( row >= (~lhs).rows() )
641  continue;
642 
643  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
644  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
645  addAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
646  }
647  }
648 }
650 //*************************************************************************************************
651 
652 
653 //*************************************************************************************************
670 template< typename MT1 // Type of the left-hand side dense matrix
671  , bool SO // Storage order of the left-hand side dense matrix
672  , typename MT2 > // Type of the right-hand side sparse matrix
673 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
674  smpAddAssign( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
675 {
677 
678  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
679  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
680 
681  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
682  addAssign( ~lhs, ~rhs );
683  return;
684  }
685 
686  typedef typename MT1::ElementType ET1;
687  typedef typename MT2::ElementType ET2;
688  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
689 
690 #pragma omp parallel shared( lhs, rhs )
691  {
692  const int threads ( omp_get_num_threads() );
693  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
694  const size_t colsPerThread( (~lhs).columns() / threads + addon );
695 
696 #pragma omp for schedule(dynamic,1) nowait
697  for( int i=0UL; i<threads; ++i )
698  {
699  const size_t column( i*colsPerThread );
700 
701  if( column >= (~lhs).columns() )
702  continue;
703 
704  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
705  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
706  addAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
707  }
708  }
709 }
711 //*************************************************************************************************
712 
713 
714 //*************************************************************************************************
731 template< typename MT1 // Type of the left-hand side dense matrix
732  , bool SO1 // Storage order of the left-hand side matrix
733  , typename MT2 // Type of the right-hand side matrix
734  , bool SO2 > // Storage order of the right-hand side matrix
735 inline void smpSubAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
736 {
738 
739  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
740  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
741  subAssign( ~lhs, ~rhs );
742 }
744 //*************************************************************************************************
745 
746 
747 //*************************************************************************************************
764 template< typename MT1 // Type of the left-hand side dense matrix
765  , bool SO // Storage order of the left-hand side dense matrix
766  , typename MT2 > // Type of the right-hand side dense matrix
767 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
768  smpSubAssign( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,rowMajor>& rhs )
769 {
771 
772  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
773  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
774 
775  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
776  subAssign( ~lhs, ~rhs );
777  return;
778  }
779 
780  typedef typename MT1::ElementType ET1;
781  typedef typename MT2::ElementType ET2;
782  typedef IntrinsicTrait<typename MT1::ElementType> IT;
783  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
784  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
785 
786  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
787  const bool lhsAligned ( (~lhs).isAligned() );
788  const bool rhsAligned ( (~rhs).isAligned() );
789 
790 #pragma omp parallel shared( lhs, rhs )
791  {
792  const int threads ( omp_get_num_threads() );
793  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
794  const size_t equalShare ( (~lhs).rows() / threads + addon );
795  const size_t rest ( equalShare & ( IT::size - 1UL ) );
796  const size_t rowsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
797 
798 #pragma omp for schedule(dynamic,1) nowait
799  for( int i=0UL; i<threads; ++i )
800  {
801  const size_t row( i*rowsPerThread );
802 
803  if( row >= (~lhs).rows() )
804  continue;
805 
806  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
807 
808  if( vectorizable && lhsAligned && rhsAligned ) {
809  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
810  subAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
811  }
812  else if( vectorizable && lhsAligned ) {
813  AlignedTarget target( submatrix<aligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
814  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
815  }
816  else if( vectorizable && rhsAligned ) {
817  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
818  subAssign( target, submatrix<aligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
819  }
820  else {
821  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
822  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
823  }
824  }
825  }
826 }
828 //*************************************************************************************************
829 
830 
831 //*************************************************************************************************
848 template< typename MT1 // Type of the left-hand side dense matrix
849  , bool SO // Storage order of the left-hand side dense matrix
850  , typename MT2 > // Type of the right-hand side dense matrix
851 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
852  smpSubAssign( DenseMatrix<MT1,SO>& lhs, const DenseMatrix<MT2,columnMajor>& rhs )
853 {
855 
856  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
857  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
858 
859  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
860  subAssign( ~lhs, ~rhs );
861  return;
862  }
863 
864  typedef typename MT1::ElementType ET1;
865  typedef typename MT2::ElementType ET2;
866  typedef IntrinsicTrait<typename MT1::ElementType> IT;
867  typedef typename SubmatrixExprTrait<MT1,aligned>::Type AlignedTarget;
868  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
869 
870  const bool vectorizable( MT1::vectorizable && MT2::vectorizable && IsSame<ET1,ET2>::value );
871  const bool lhsAligned ( (~lhs).isAligned() );
872  const bool rhsAligned ( (~rhs).isAligned() );
873 
874 #pragma omp parallel shared( lhs, rhs )
875  {
876  const int threads ( omp_get_num_threads() );
877  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
878  const size_t equalShare ( (~lhs).columns() / threads + addon );
879  const size_t rest ( equalShare & ( IT::size - 1UL ) );
880  const size_t colsPerThread( ( vectorizable && rest )?( equalShare - rest + IT::size ):( equalShare ) );
881 
882 #pragma omp for schedule(dynamic,1) nowait
883  for( int i=0UL; i<threads; ++i )
884  {
885  const size_t column( i*colsPerThread );
886 
887  if( column >= (~lhs).columns() )
888  continue;
889 
890  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
891 
892  if( vectorizable && lhsAligned && rhsAligned ) {
893  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
894  subAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
895  }
896  else if( vectorizable && lhsAligned ) {
897  AlignedTarget target( submatrix<aligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
898  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
899  }
900  else if( vectorizable && rhsAligned ) {
901  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
902  subAssign( target, submatrix<aligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
903  }
904  else {
905  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
906  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
907  }
908  }
909  }
910 }
912 //*************************************************************************************************
913 
914 
915 //*************************************************************************************************
932 template< typename MT1 // Type of the left-hand side dense matrix
933  , bool SO // Storage order of the left-hand side dense matrix
934  , typename MT2 > // Type of the right-hand side sparse matrix
935 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
936  smpSubAssign( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,rowMajor>& rhs )
937 {
939 
940  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
941  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
942 
943  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
944  subAssign( ~lhs, ~rhs );
945  return;
946  }
947 
948  typedef typename MT1::ElementType ET1;
949  typedef typename MT2::ElementType ET2;
950  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
951 
952 #pragma omp parallel shared( lhs, rhs )
953  {
954  const int threads ( omp_get_num_threads() );
955  const size_t addon ( ( ( (~lhs).rows() % threads ) != 0UL )? 1UL : 0UL );
956  const size_t rowsPerThread( (~lhs).rows() / threads + addon );
957 
958 #pragma omp for schedule(dynamic,1) nowait
959  for( int i=0UL; i<threads; ++i )
960  {
961  const size_t row( i*rowsPerThread );
962 
963  if( row >= (~lhs).rows() )
964  continue;
965 
966  const size_t m( min( rowsPerThread, (~lhs).rows() - row ) );
967  UnalignedTarget target( submatrix<unaligned>( ~lhs, row, 0UL, m, (~lhs).columns() ) );
968  subAssign( target, submatrix<unaligned>( ~rhs, row, 0UL, m, (~lhs).columns() ) );
969  }
970  }
971 }
973 //*************************************************************************************************
974 
975 
976 //*************************************************************************************************
993 template< typename MT1 // Type of the left-hand side dense matrix
994  , bool SO // Storage order of the left-hand side dense matrix
995  , typename MT2 > // Type of the right-hand side sparse matrix
996 inline typename EnableIfTrue< MT1::smpAssignable && MT2::smpAssignable >::Type
997  smpSubAssign( DenseMatrix<MT1,SO>& lhs, const SparseMatrix<MT2,columnMajor>& rhs )
998 {
1000 
1001  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1002  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1003 
1004  if( isSerialSectionActive() || omp_get_num_threads() != 1 || !(~rhs).canSMPAssign() ) {
1005  subAssign( ~lhs, ~rhs );
1006  return;
1007  }
1008 
1009  typedef typename MT1::ElementType ET1;
1010  typedef typename MT2::ElementType ET2;
1011  typedef typename SubmatrixExprTrait<MT1,unaligned>::Type UnalignedTarget;
1012 
1013 #pragma omp parallel shared( lhs, rhs )
1014  {
1015  const int threads ( omp_get_num_threads() );
1016  const size_t addon ( ( ( (~lhs).columns() % threads ) != 0UL )? 1UL : 0UL );
1017  const size_t colsPerThread( (~lhs).columns() / threads + addon );
1018 
1019 #pragma omp for schedule(dynamic,1) nowait
1020  for( int i=0UL; i<threads; ++i )
1021  {
1022  const size_t column( i*colsPerThread );
1023 
1024  if( column >= (~lhs).columns() )
1025  continue;
1026 
1027  const size_t n( min( colsPerThread, (~lhs).columns() - column ) );
1028  UnalignedTarget target( submatrix<unaligned>( ~lhs, 0UL, column, (~lhs).rows(), n ) );
1029  subAssign( target, submatrix<unaligned>( ~rhs, 0UL, column, (~lhs).rows(), n ) );
1030  }
1031  }
1032 }
1034 //*************************************************************************************************
1035 
1036 
1037 //*************************************************************************************************
1054 template< typename MT1 // Type of the left-hand side dense matrix
1055  , bool SO1 // Storage order of the left-hand side matrix
1056  , typename MT2 // Type of the right-hand side matrix
1057  , bool SO2 > // Storage order of the right-hand side matrix
1058 inline void smpMultAssign( DenseMatrix<MT1,SO1>& lhs, const Matrix<MT2,SO2>& rhs )
1059 {
1061 
1062  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == (~rhs).rows() , "Invalid number of rows" );
1063  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).columns(), "Invalid number of columns" );
1064  multAssign( ~lhs, ~rhs );
1065 }
1067 //*************************************************************************************************
1068 
1069 
1070 
1071 
1072 //=================================================================================================
1073 //
1074 // COMPILE TIME CONSTRAINT
1075 //
1076 //=================================================================================================
1077 
1078 //*************************************************************************************************
1080 namespace {
1081 
1083 
1084 }
1086 //*************************************************************************************************
1087 
1088 } // namespace blaze
1089 
1090 #endif
Header file for mathematical functions.
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:178
Header file for the IsSame and IsStrictlySame type traits.
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename ColumnExprTrait< MT >::Type >::Type column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:103
Header file for the intrinsic trait.
Header file for the SparseMatrix base class.
Header file for the matrix storage order types.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Header file for the complete DenseSubmatrix implementation.
Compile time assertion.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Header file for the serial section implementation.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the EnableIf class template.
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
DisableIf< Or< IsComputation< MT >, IsTransExpr< MT > >, typename RowExprTrait< MT >::Type >::Type row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:103
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:211
Header file for the SubmatrixExprTrait class template.
Header file for run time assertion macros.
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
Header file for the complete SparseSubmatrix implementation.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: OpenMP.h:65
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:154
System settings for the OpenMP parallelization.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:143
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:138
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the FunctionTrace class.