DenseVector.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
44 #include <blaze/math/Aliases.h>
48 #include <blaze/math/Functions.h>
56 #include <blaze/system/SMP.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/mpl/And.h>
61 #include <blaze/util/mpl/Not.h>
62 #include <blaze/util/mpl/Or.h>
64 #include <blaze/util/Types.h>
66 
67 
68 namespace blaze {
69 
70 //=================================================================================================
71 //
72 // PLAIN ASSIGNMENT
73 //
74 //=================================================================================================
75 
76 //*************************************************************************************************
92 template< typename VT1 // Type of the left-hand side dense vector
93  , bool TF1 // Transpose flag of the left-hand side dense vector
94  , typename VT2 // Type of the right-hand side dense vector
95  , bool TF2 > // Transpose flag of the right-hand side dense vector
96 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
97 {
99 
100  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
101 
102  typedef ElementType_<VT1> ET1;
103  typedef ElementType_<VT2> ET2;
104  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
105  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
106 
107  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
108 
109  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
110  const bool lhsAligned ( (~lhs).isAligned() );
111  const bool rhsAligned ( (~rhs).isAligned() );
112 
113  const int threads ( omp_get_num_threads() );
114  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
115  const size_t equalShare ( (~lhs).size() / threads + addon );
116  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
117  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
118 
119 #pragma omp for schedule(dynamic,1) nowait
120  for( int i=0UL; i<threads; ++i )
121  {
122  const size_t index( i*sizePerThread );
123 
124  if( index >= (~lhs).size() )
125  continue;
126 
127  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
128 
129  if( simdEnabled && lhsAligned && rhsAligned ) {
130  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
131  assign( target, subvector<aligned>( ~rhs, index, size ) );
132  }
133  else if( simdEnabled && lhsAligned ) {
134  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
135  assign( target, subvector<unaligned>( ~rhs, index, size ) );
136  }
137  else if( simdEnabled && rhsAligned ) {
138  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
139  assign( target, subvector<aligned>( ~rhs, index, size ) );
140  }
141  else {
142  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
143  assign( target, subvector<unaligned>( ~rhs, index, size ) );
144  }
145  }
146 }
148 //*************************************************************************************************
149 
150 
151 //*************************************************************************************************
167 template< typename VT1 // Type of the left-hand side dense vector
168  , bool TF1 // Transpose flag of the left-hand side dense vector
169  , typename VT2 // Type of the right-hand side sparse vector
170  , bool TF2 > // Transpose flag of the right-hand side sparse vector
171 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
172 {
174 
175  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
176 
177  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
178 
179  const int threads ( omp_get_num_threads() );
180  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
181  const size_t sizePerThread( (~lhs).size() / threads + addon );
182 
183 #pragma omp for schedule(dynamic,1) nowait
184  for( int i=0UL; i<threads; ++i )
185  {
186  const size_t index( i*sizePerThread );
187 
188  if( index >= (~lhs).size() )
189  continue;
190 
191  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
192  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
193  assign( target, subvector<unaligned>( ~rhs, index, size ) );
194  }
195 }
197 //*************************************************************************************************
198 
199 
200 //*************************************************************************************************
218 template< typename VT1 // Type of the left-hand side dense vector
219  , bool TF1 // Transpose flag of the left-hand side dense vector
220  , typename VT2 // Type of the right-hand side vector
221  , bool TF2 > // Transpose flag of the right-hand side vector
222 inline EnableIf_< And< IsDenseVector<VT1>
223  , Or< Not< IsSMPAssignable<VT1> >
224  , Not< IsSMPAssignable<VT2> > > > >
225  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
226 {
228 
229  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
230 
231  assign( ~lhs, ~rhs );
232 }
234 //*************************************************************************************************
235 
236 
237 //*************************************************************************************************
255 template< typename VT1 // Type of the left-hand side dense vector
256  , bool TF1 // Transpose flag of the left-hand side dense vector
257  , typename VT2 // Type of the right-hand side vector
258  , bool TF2 > // Transpose flag of the right-hand side vector
259 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
260  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
261 {
263 
264  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
265  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
266 
267  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
268 
270  {
271  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
272  assign( ~lhs, ~rhs );
273  }
274  else {
275 #pragma omp parallel shared( lhs, rhs )
276  smpAssign_backend( ~lhs, ~rhs );
277  }
278  }
279 }
281 //*************************************************************************************************
282 
283 
284 
285 
286 //=================================================================================================
287 //
288 // ADDITION ASSIGNMENT
289 //
290 //=================================================================================================
291 
292 //*************************************************************************************************
308 template< typename VT1 // Type of the left-hand side dense vector
309  , bool TF1 // Transpose flag of the left-hand side dense vector
310  , typename VT2 // Type of the right-hand side dense vector
311  , bool TF2 > // Transpose flag of the right-hand side dense vector
312 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
313 {
315 
316  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
317 
318  typedef ElementType_<VT1> ET1;
319  typedef ElementType_<VT2> ET2;
320  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
321  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
322 
323  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
324 
325  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
326  const bool lhsAligned ( (~lhs).isAligned() );
327  const bool rhsAligned ( (~rhs).isAligned() );
328 
329  const int threads ( omp_get_num_threads() );
330  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
331  const size_t equalShare ( (~lhs).size() / threads + addon );
332  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
333  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
334 
335 #pragma omp for schedule(dynamic,1) nowait
336  for( int i=0UL; i<threads; ++i )
337  {
338  const size_t index( i*sizePerThread );
339 
340  if( index >= (~lhs).size() )
341  continue;
342 
343  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
344 
345  if( simdEnabled && lhsAligned && rhsAligned ) {
346  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
347  addAssign( target, subvector<aligned>( ~rhs, index, size ) );
348  }
349  else if( simdEnabled && lhsAligned ) {
350  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
351  addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
352  }
353  else if( simdEnabled && rhsAligned ) {
354  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
355  addAssign( target, subvector<aligned>( ~rhs, index, size ) );
356  }
357  else {
358  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
359  addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
360  }
361  }
362 }
364 //*************************************************************************************************
365 
366 
367 //*************************************************************************************************
383 template< typename VT1 // Type of the left-hand side dense vector
384  , bool TF1 // Transpose flag of the left-hand side dense vector
385  , typename VT2 // Type of the right-hand side sparse vector
386  , bool TF2 > // Transpose flag of the right-hand side sparse vector
387 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
388 {
390 
391  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
392 
393  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
394 
395  const int threads ( omp_get_num_threads() );
396  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
397  const size_t sizePerThread( (~lhs).size() / threads + addon );
398 
399 #pragma omp for schedule(dynamic,1) nowait
400  for( int i=0UL; i<threads; ++i )
401  {
402  const size_t index( i*sizePerThread );
403 
404  if( index >= (~lhs).size() )
405  continue;
406 
407  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
408  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
409  addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
410  }
411 }
413 //*************************************************************************************************
414 
415 
416 //*************************************************************************************************
434 template< typename VT1 // Type of the left-hand side dense vector
435  , bool TF1 // Transpose flag of the left-hand side dense vector
436  , typename VT2 // Type of the right-hand side vector
437  , bool TF2 > // Transpose flag of the right-hand side vector
438 inline EnableIf_< And< IsDenseVector<VT1>
439  , Or< Not< IsSMPAssignable<VT1> >
440  , Not< IsSMPAssignable<VT2> > > > >
441  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
442 {
444 
445  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
446 
447  addAssign( ~lhs, ~rhs );
448 }
450 //*************************************************************************************************
451 
452 
453 //*************************************************************************************************
471 template< typename VT1 // Type of the left-hand side dense vector
472  , bool TF1 // Transpose flag of the left-hand side dense vector
473  , typename VT2 // Type of the right-hand side vector
474  , bool TF2 > // Transpose flag of the right-hand side vector
475 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
476  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
477 {
479 
480  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
481  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
482 
483  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
484 
486  {
487  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
488  addAssign( ~lhs, ~rhs );
489  }
490  else {
491 #pragma omp parallel shared( lhs, rhs )
492  smpAddAssign_backend( ~lhs, ~rhs );
493  }
494  }
495 }
497 //*************************************************************************************************
498 
499 
500 
501 
502 //=================================================================================================
503 //
504 // SUBTRACTION ASSIGNMENT
505 //
506 //=================================================================================================
507 
508 //*************************************************************************************************
524 template< typename VT1 // Type of the left-hand side dense vector
525  , bool TF1 // Transpose flag of the left-hand side dense vector
526  , typename VT2 // Type of the right-hand side dense vector
527  , bool TF2 > // Transpose flag of the right-hand side dense vector
528 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
529 {
531 
532  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
533 
534  typedef ElementType_<VT1> ET1;
535  typedef ElementType_<VT2> ET2;
536  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
537  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
538 
539  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
540 
541  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
542  const bool lhsAligned ( (~lhs).isAligned() );
543  const bool rhsAligned ( (~rhs).isAligned() );
544 
545  const int threads ( omp_get_num_threads() );
546  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
547  const size_t equalShare ( (~lhs).size() / threads + addon );
548  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
549  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
550 
551 #pragma omp for schedule(dynamic,1) nowait
552  for( int i=0UL; i<threads; ++i )
553  {
554  const size_t index( i*sizePerThread );
555 
556  if( index >= (~lhs).size() )
557  continue;
558 
559  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
560 
561  if( simdEnabled && lhsAligned && rhsAligned ) {
562  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
563  subAssign( target, subvector<aligned>( ~rhs, index, size ) );
564  }
565  else if( simdEnabled && lhsAligned ) {
566  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
567  subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
568  }
569  else if( simdEnabled && rhsAligned ) {
570  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
571  subAssign( target, subvector<aligned>( ~rhs, index, size ) );
572  }
573  else {
574  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
575  subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
576  }
577  }
578 }
580 //*************************************************************************************************
581 
582 
583 //*************************************************************************************************
599 template< typename VT1 // Type of the left-hand side dense vector
600  , bool TF1 // Transpose flag of the left-hand side dense vector
601  , typename VT2 // Type of the right-hand side sparse vector
602  , bool TF2 > // Transpose flag of the right-hand side sparse vector
603 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
604 {
606 
607  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
608 
609  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
610 
611  const int threads ( omp_get_num_threads() );
612  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
613  const size_t sizePerThread( (~lhs).size() / threads + addon );
614 
615 #pragma omp for schedule(dynamic,1) nowait
616  for( int i=0UL; i<threads; ++i )
617  {
618  const size_t index( i*sizePerThread );
619 
620  if( index >= (~lhs).size() )
621  continue;
622 
623  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
624  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
625  subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
626  }
627 }
629 //*************************************************************************************************
630 
631 
632 //*************************************************************************************************
650 template< typename VT1 // Type of the left-hand side dense vector
651  , bool TF1 // Transpose flag of the left-hand side dense vector
652  , typename VT2 // Type of the right-hand side vector
653  , bool TF2 > // Transpose flag of the right-hand side vector
654 inline EnableIf_< And< IsDenseVector<VT1>
655  , Or< Not< IsSMPAssignable<VT1> >
656  , Not< IsSMPAssignable<VT2> > > > >
657  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
658 {
660 
661  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
662 
663  subAssign( ~lhs, ~rhs );
664 }
666 //*************************************************************************************************
667 
668 
669 //*************************************************************************************************
687 template< typename VT1 // Type of the left-hand side dense vector
688  , bool TF1 // Transpose flag of the left-hand side dense vector
689  , typename VT2 // Type of the right-hand side vector
690  , bool TF2 > // Transpose flag of the right-hand side vector
691 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
692  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
693 {
695 
696  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
697  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
698 
699  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
700 
702  {
703  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
704  subAssign( ~lhs, ~rhs );
705  }
706  else {
707 #pragma omp parallel shared( lhs, rhs )
708  smpSubAssign_backend( ~lhs, ~rhs );
709  }
710  }
711 }
713 //*************************************************************************************************
714 
715 
716 
717 
718 //=================================================================================================
719 //
720 // MULTIPLICATION ASSIGNMENT
721 //
722 //=================================================================================================
723 
724 //*************************************************************************************************
741 template< typename VT1 // Type of the left-hand side dense vector
742  , bool TF1 // Transpose flag of the left-hand side dense vector
743  , typename VT2 // Type of the right-hand side dense vector
744  , bool TF2 > // Transpose flag of the right-hand side dense vector
745 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
746 {
748 
749  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
750 
751  typedef ElementType_<VT1> ET1;
752  typedef ElementType_<VT2> ET2;
753  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
754  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
755 
756  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
757 
758  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
759  const bool lhsAligned ( (~lhs).isAligned() );
760  const bool rhsAligned ( (~rhs).isAligned() );
761 
762  const int threads ( omp_get_num_threads() );
763  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
764  const size_t equalShare ( (~lhs).size() / threads + addon );
765  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
766  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
767 
768 #pragma omp for schedule(dynamic,1) nowait
769  for( int i=0UL; i<threads; ++i )
770  {
771  const size_t index( i*sizePerThread );
772 
773  if( index >= (~lhs).size() )
774  continue;
775 
776  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
777 
778  if( simdEnabled && lhsAligned && rhsAligned ) {
779  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
780  multAssign( target, subvector<aligned>( ~rhs, index, size ) );
781  }
782  else if( simdEnabled && lhsAligned ) {
783  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
784  multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
785  }
786  else if( simdEnabled && rhsAligned ) {
787  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
788  multAssign( target, subvector<aligned>( ~rhs, index, size ) );
789  }
790  else {
791  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
792  multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
793  }
794  }
795 }
797 //*************************************************************************************************
798 
799 
800 //*************************************************************************************************
817 template< typename VT1 // Type of the left-hand side dense vector
818  , bool TF1 // Transpose flag of the left-hand side dense vector
819  , typename VT2 // Type of the right-hand side sparse vector
820  , bool TF2 > // Transpose flag of the right-hand side sparse vector
821 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
822 {
824 
825  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
826 
827  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
828 
829  const int threads ( omp_get_num_threads() );
830  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
831  const size_t sizePerThread( (~lhs).size() / threads + addon );
832 
833 #pragma omp for schedule(dynamic,1) nowait
834  for( int i=0UL; i<threads; ++i )
835  {
836  const size_t index( i*sizePerThread );
837 
838  if( index >= (~lhs).size() )
839  continue;
840 
841  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
842  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
843  multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
844  }
845 }
847 //*************************************************************************************************
848 
849 
850 //*************************************************************************************************
868 template< typename VT1 // Type of the left-hand side dense vector
869  , bool TF1 // Transpose flag of the left-hand side dense vector
870  , typename VT2 // Type of the right-hand side vector
871  , bool TF2 > // Transpose flag of the right-hand side vector
872 inline EnableIf_< And< IsDenseVector<VT1>
873  , Or< Not< IsSMPAssignable<VT1> >
874  , Not< IsSMPAssignable<VT2> > > > >
875  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
876 {
878 
879  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
880 
881  multAssign( ~lhs, ~rhs );
882 }
884 //*************************************************************************************************
885 
886 
887 //*************************************************************************************************
905 template< typename VT1 // Type of the left-hand side dense vector
906  , bool TF1 // Transpose flag of the left-hand side dense vector
907  , typename VT2 // Type of the right-hand side vector
908  , bool TF2 > // Transpose flag of the right-hand side vector
909 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
910  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
911 {
913 
914  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
915  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
916 
917  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
918 
920  {
921  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
922  multAssign( ~lhs, ~rhs );
923  }
924  else {
925 #pragma omp parallel shared( lhs, rhs )
926  smpMultAssign_backend( ~lhs, ~rhs );
927  }
928  }
929 }
931 //*************************************************************************************************
932 
933 
934 
935 
936 //=================================================================================================
937 //
938 // DIVISION ASSIGNMENT
939 //
940 //=================================================================================================
941 
942 //*************************************************************************************************
958 template< typename VT1 // Type of the left-hand side dense vector
959  , bool TF1 // Transpose flag of the left-hand side dense vector
960  , typename VT2 // Type of the right-hand side dense vector
961  , bool TF2 > // Transpose flag of the right-hand side dense vector
962 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
963 {
965 
966  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
967 
968  typedef ElementType_<VT1> ET1;
969  typedef ElementType_<VT2> ET2;
970  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
971  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
972 
973  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
974 
975  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
976  const bool lhsAligned ( (~lhs).isAligned() );
977  const bool rhsAligned ( (~rhs).isAligned() );
978 
979  const int threads ( omp_get_num_threads() );
980  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
981  const size_t equalShare ( (~lhs).size() / threads + addon );
982  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
983  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
984 
985 #pragma omp for schedule(dynamic,1) nowait
986  for( int i=0UL; i<threads; ++i )
987  {
988  const size_t index( i*sizePerThread );
989 
990  if( index >= (~lhs).size() )
991  continue;
992 
993  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
994 
995  if( simdEnabled && lhsAligned && rhsAligned ) {
996  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
997  divAssign( target, subvector<aligned>( ~rhs, index, size ) );
998  }
999  else if( simdEnabled && lhsAligned ) {
1000  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
1001  divAssign( target, subvector<unaligned>( ~rhs, index, size ) );
1002  }
1003  else if( simdEnabled && rhsAligned ) {
1004  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
1005  divAssign( target, subvector<aligned>( ~rhs, index, size ) );
1006  }
1007  else {
1008  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
1009  divAssign( target, subvector<unaligned>( ~rhs, index, size ) );
1010  }
1011  }
1012 }
1014 //*************************************************************************************************
1015 
1016 
1017 //*************************************************************************************************
1035 template< typename VT1 // Type of the left-hand side dense vector
1036  , bool TF1 // Transpose flag of the left-hand side dense vector
1037  , typename VT2 // Type of the right-hand side vector
1038  , bool TF2 > // Transpose flag of the right-hand side vector
1039 inline EnableIf_< And< IsDenseVector<VT1>
1040  , Or< Not< IsSMPAssignable<VT1> >
1041  , Not< IsSMPAssignable<VT2> > > > >
1042  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1043 {
1045 
1046  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1047 
1048  divAssign( ~lhs, ~rhs );
1049 }
1051 //*************************************************************************************************
1052 
1053 
1054 //*************************************************************************************************
1072 template< typename VT1 // Type of the left-hand side dense vector
1073  , bool TF1 // Transpose flag of the left-hand side dense vector
1074  , typename VT2 // Type of the right-hand side vector
1075  , bool TF2 > // Transpose flag of the right-hand side vector
1076 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1077  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1078 {
1080 
1081  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
1082  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
1083 
1084  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1085 
1087  {
1088  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1089  divAssign( ~lhs, ~rhs );
1090  }
1091  else {
1092 #pragma omp parallel shared( lhs, rhs )
1093  smpDivAssign_backend( ~lhs, ~rhs );
1094  }
1095  }
1096 }
1098 //*************************************************************************************************
1099 
1100 
1101 
1102 
1103 //=================================================================================================
1104 //
1105 // COMPILE TIME CONSTRAINTS
1106 //
1107 //=================================================================================================
1108 
1109 //*************************************************************************************************
1111 namespace {
1112 
1114 
1115 }
1117 //*************************************************************************************************
1118 
1119 } // namespace blaze
1120 
1121 #endif
Header file for auxiliary alias declarations.
Header file for mathematical functions.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:258
Header file for the IsSame and IsStrictlySame type traits.
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1669
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.