DenseVector.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
47 #include <blaze/math/Functions.h>
56 #include <blaze/system/SMP.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/mpl/And.h>
61 #include <blaze/util/mpl/Not.h>
62 #include <blaze/util/mpl/Or.h>
64 #include <blaze/util/Types.h>
66 
67 
68 namespace blaze {
69 
70 //=================================================================================================
71 //
72 // PLAIN ASSIGNMENT
73 //
74 //=================================================================================================
75 
76 //*************************************************************************************************
92 template< typename VT1 // Type of the left-hand side dense vector
93  , bool TF1 // Transpose flag of the left-hand side dense vector
94  , typename VT2 // Type of the right-hand side dense vector
95  , bool TF2 > // Transpose flag of the right-hand side dense vector
96 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
97 {
99 
100  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
101 
102  typedef ElementType_<VT1> ET1;
103  typedef ElementType_<VT2> ET2;
104  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
105  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
106 
107  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
108 
109  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
110  const bool lhsAligned ( (~lhs).isAligned() );
111  const bool rhsAligned ( (~rhs).isAligned() );
112 
113  const size_t threads ( TheThreadBackend::size() );
114  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
115  const size_t equalShare ( (~lhs).size() / threads + addon );
116  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
117  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
118 
119  for( size_t i=0UL; i<threads; ++i )
120  {
121  const size_t index( i*sizePerThread );
122 
123  if( index >= (~lhs).size() )
124  continue;
125 
126  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
127 
128  if( simdEnabled && lhsAligned && rhsAligned ) {
129  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
130  TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index, size ) );
131  }
132  else if( simdEnabled && lhsAligned ) {
133  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
134  TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
135  }
136  else if( simdEnabled && rhsAligned ) {
137  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
138  TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index, size ) );
139  }
140  else {
141  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
142  TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
143  }
144  }
145 
146  TheThreadBackend::wait();
147 }
149 //*************************************************************************************************
150 
151 
152 //*************************************************************************************************
169 template< typename VT1 // Type of the left-hand side dense vector
170  , bool TF1 // Transpose flag of the left-hand side dense vector
171  , typename VT2 // Type of the right-hand side sparse vector
172  , bool TF2 > // Transpose flag of the right-hand side sparse vector
173 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
174 {
176 
177  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
178 
179  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
180 
181  const size_t threads ( TheThreadBackend::size() );
182  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
183  const size_t sizePerThread( (~lhs).size() / threads + addon );
184 
185  for( size_t i=0UL; i<threads; ++i )
186  {
187  const size_t index( i*sizePerThread );
188 
189  if( index >= (~lhs).size() )
190  continue;
191 
192  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
193  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
194  TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
195  }
196 
197  TheThreadBackend::wait();
198 }
200 //*************************************************************************************************
201 
202 
203 //*************************************************************************************************
221 template< typename VT1 // Type of the left-hand side dense vector
222  , bool TF1 // Transpose flag of the left-hand side dense vector
223  , typename VT2 // Type of the right-hand side vector
224  , bool TF2 > // Transpose flag of the right-hand side vector
225 inline EnableIf_< And< IsDenseVector<VT1>
226  , Or< Not< IsSMPAssignable<VT1> >
227  , Not< IsSMPAssignable<VT2> > > > >
228  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
229 {
231 
232  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
233 
234  assign( ~lhs, ~rhs );
235 }
237 //*************************************************************************************************
238 
239 
240 //*************************************************************************************************
258 template< typename VT1 // Type of the left-hand side dense vector
259  , bool TF1 // Transpose flag of the left-hand side dense vector
260  , typename VT2 // Type of the right-hand side vector
261  , bool TF2 > // Transpose flag of the right-hand side vector
262 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
263  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
264 {
266 
267  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
268  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
269 
270  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
271 
273  {
274  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
275  assign( ~lhs, ~rhs );
276  }
277  else {
278  smpAssign_backend( ~lhs, ~rhs );
279  }
280  }
281 }
283 //*************************************************************************************************
284 
285 
286 
287 
288 //=================================================================================================
289 //
290 // ADDITION ASSIGNMENT
291 //
292 //=================================================================================================
293 
294 //*************************************************************************************************
311 template< typename VT1 // Type of the left-hand side dense vector
312  , bool TF1 // Transpose flag of the left-hand side dense vector
313  , typename VT2 // Type of the right-hand side dense vector
314  , bool TF2 > // Transpose flag of the right-hand side dense vector
315 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
316 {
318 
319  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
320 
321  typedef ElementType_<VT1> ET1;
322  typedef ElementType_<VT2> ET2;
323  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
324  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
325 
326  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
327 
328  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
329  const bool lhsAligned ( (~lhs).isAligned() );
330  const bool rhsAligned ( (~rhs).isAligned() );
331 
332  const size_t threads ( TheThreadBackend::size() );
333  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
334  const size_t equalShare ( (~lhs).size() / threads + addon );
335  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
336  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
337 
338  for( size_t i=0UL; i<threads; ++i )
339  {
340  const size_t index( i*sizePerThread );
341 
342  if( index >= (~lhs).size() )
343  continue;
344 
345  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
346 
347  if( simdEnabled && lhsAligned && rhsAligned ) {
348  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
349  TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index, size ) );
350  }
351  else if( simdEnabled && lhsAligned ) {
352  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
353  TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
354  }
355  else if( simdEnabled && rhsAligned ) {
356  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
357  TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index, size ) );
358  }
359  else {
360  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
361  TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
362  }
363  }
364 
365  TheThreadBackend::wait();
366 }
368 //*************************************************************************************************
369 
370 
371 //*************************************************************************************************
388 template< typename VT1 // Type of the left-hand side dense vector
389  , bool TF1 // Transpose flag of the left-hand side dense vector
390  , typename VT2 // Type of the right-hand side sparse vector
391  , bool TF2 > // Transpose flag of the right-hand side sparse vector
392 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
393 {
395 
396  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
397 
398  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
399 
400  const size_t threads ( TheThreadBackend::size() );
401  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
402  const size_t sizePerThread( (~lhs).size() / threads + addon );
403 
404  for( size_t i=0UL; i<threads; ++i )
405  {
406  const size_t index( i*sizePerThread );
407 
408  if( index >= (~lhs).size() )
409  continue;
410 
411  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
412  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
413  TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
414  }
415 
416  TheThreadBackend::wait();
417 }
419 //*************************************************************************************************
420 
421 
422 //*************************************************************************************************
441 template< typename VT1 // Type of the left-hand side dense vector
442  , bool TF1 // Transpose flag of the left-hand side dense vector
443  , typename VT2 // Type of the right-hand side vector
444  , bool TF2 > // Transpose flag of the right-hand side vector
445 inline EnableIf_< And< IsDenseVector<VT1>
446  , Or< Not< IsSMPAssignable<VT1> >
447  , Not< IsSMPAssignable<VT2> > > > >
448  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
449 {
451 
452  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
453 
454  addAssign( ~lhs, ~rhs );
455 }
457 //*************************************************************************************************
458 
459 
460 //*************************************************************************************************
478 template< typename VT1 // Type of the left-hand side dense vector
479  , bool TF1 // Transpose flag of the left-hand side dense vector
480  , typename VT2 // Type of the right-hand side vector
481  , bool TF2 > // Transpose flag of the right-hand side vector
482 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
483  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
484 {
486 
487  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
488  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
489 
490  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
491 
493  {
494  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
495  addAssign( ~lhs, ~rhs );
496  }
497  else {
498  smpAddAssign_backend( ~lhs, ~rhs );
499  }
500  }
501 }
503 //*************************************************************************************************
504 
505 
506 
507 
508 //=================================================================================================
509 //
510 // SUBTRACTION ASSIGNMENT
511 //
512 //=================================================================================================
513 
514 //*************************************************************************************************
531 template< typename VT1 // Type of the left-hand side dense vector
532  , bool TF1 // Transpose flag of the left-hand side dense vector
533  , typename VT2 // Type of the right-hand side dense vector
534  , bool TF2 > // Transpose flag of the right-hand side dense vector
535 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
536 {
538 
539  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
540 
541  typedef ElementType_<VT1> ET1;
542  typedef ElementType_<VT2> ET2;
543  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
544  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
545 
546  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
547 
548  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
549  const bool lhsAligned ( (~lhs).isAligned() );
550  const bool rhsAligned ( (~rhs).isAligned() );
551 
552  const size_t threads ( TheThreadBackend::size() );
553  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
554  const size_t equalShare ( (~lhs).size() / threads + addon );
555  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
556  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
557 
558  for( size_t i=0UL; i<threads; ++i )
559  {
560  const size_t index( i*sizePerThread );
561 
562  if( index >= (~lhs).size() )
563  continue;
564 
565  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
566 
567  if( simdEnabled && lhsAligned && rhsAligned ) {
568  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
569  TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index, size ) );
570  }
571  else if( simdEnabled && lhsAligned ) {
572  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
573  TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
574  }
575  else if( simdEnabled && rhsAligned ) {
576  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
577  TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index, size ) );
578  }
579  else {
580  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
581  TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
582  }
583  }
584 
585  TheThreadBackend::wait();
586 }
588 //*************************************************************************************************
589 
590 
591 //*************************************************************************************************
608 template< typename VT1 // Type of the left-hand side dense vector
609  , bool TF1 // Transpose flag of the left-hand side dense vector
610  , typename VT2 // Type of the right-hand side sparse vector
611  , bool TF2 > // Transpose flag of the right-hand side sparse vector
612 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
613 {
615 
616  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
617 
618  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
619 
620  const size_t threads ( TheThreadBackend::size() );
621  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
622  const size_t sizePerThread( (~lhs).size() / threads + addon );
623 
624  for( size_t i=0UL; i<threads; ++i )
625  {
626  const size_t index( i*sizePerThread );
627 
628  if( index >= (~lhs).size() )
629  continue;
630 
631  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
632  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
633  TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
634  }
635 
636  TheThreadBackend::wait();
637 }
639 //*************************************************************************************************
640 
641 
642 //*************************************************************************************************
661 template< typename VT1 // Type of the left-hand side dense vector
662  , bool TF1 // Transpose flag of the left-hand side dense vector
663  , typename VT2 // Type of the right-hand side vector
664  , bool TF2 > // Transpose flag of the right-hand side vector
665 inline EnableIf_< And< IsDenseVector<VT1>
666  , Or< Not< IsSMPAssignable<VT1> >
667  , Not< IsSMPAssignable<VT2> > > > >
668  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
669 {
671 
672  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
673 
674  subAssign( ~lhs, ~rhs );
675 }
677 //*************************************************************************************************
678 
679 
680 //*************************************************************************************************
699 template< typename VT1 // Type of the left-hand side dense vector
700  , bool TF1 // Transpose flag of the left-hand side dense vector
701  , typename VT2 // Type of the right-hand side vector
702  , bool TF2 > // Transpose flag of the right-hand side vector
703 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
704  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
705 {
707 
708  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
709  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
710 
711  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
712 
714  {
715  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
716  subAssign( ~lhs, ~rhs );
717  }
718  else {
719  smpSubAssign_backend( ~lhs, ~rhs );
720  }
721  }
722 }
724 //*************************************************************************************************
725 
726 
727 
728 
729 //=================================================================================================
730 //
731 // MULTIPLICATION ASSIGNMENT
732 //
733 //=================================================================================================
734 
735 //*************************************************************************************************
752 template< typename VT1 // Type of the left-hand side dense vector
753  , bool TF1 // Transpose flag of the left-hand side dense vector
754  , typename VT2 // Type of the right-hand side dense vector
755  , bool TF2 > // Transpose flag of the right-hand side dense vector
756 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
757 {
759 
760  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
761 
762  typedef ElementType_<VT1> ET1;
763  typedef ElementType_<VT2> ET2;
764  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
765  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
766 
767  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
768 
769  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
770  const bool lhsAligned ( (~lhs).isAligned() );
771  const bool rhsAligned ( (~rhs).isAligned() );
772 
773  const size_t threads ( TheThreadBackend::size() );
774  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
775  const size_t equalShare ( (~lhs).size() / threads + addon );
776  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
777  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
778 
779  for( size_t i=0UL; i<threads; ++i )
780  {
781  const size_t index( i*sizePerThread );
782 
783  if( index >= (~lhs).size() )
784  continue;
785 
786  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
787 
788  if( simdEnabled && lhsAligned && rhsAligned ) {
789  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
790  TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index, size ) );
791  }
792  else if( simdEnabled && lhsAligned ) {
793  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
794  TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
795  }
796  else if( simdEnabled && rhsAligned ) {
797  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
798  TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index, size ) );
799  }
800  else {
801  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
802  TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
803  }
804  }
805 
806  TheThreadBackend::wait();
807 }
809 //*************************************************************************************************
810 
811 
812 //*************************************************************************************************
829 template< typename VT1 // Type of the left-hand side dense vector
830  , bool TF1 // Transpose flag of the left-hand side dense vector
831  , typename VT2 // Type of the right-hand side sparse vector
832  , bool TF2 > // Transpose flag of the right-hand side sparse vector
833 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
834 {
836 
837  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
838 
839  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
840 
841  const size_t threads ( TheThreadBackend::size() );
842  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
843  const size_t sizePerThread( (~lhs).size() / threads + addon );
844 
845  for( size_t i=0UL; i<threads; ++i )
846  {
847  const size_t index( i*sizePerThread );
848 
849  if( index >= (~lhs).size() )
850  continue;
851 
852  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
853  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
854  TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
855  }
856 
857  TheThreadBackend::wait();
858 }
860 //*************************************************************************************************
861 
862 
863 //*************************************************************************************************
882 template< typename VT1 // Type of the left-hand side dense vector
883  , bool TF1 // Transpose flag of the left-hand side dense vector
884  , typename VT2 // Type of the right-hand side vector
885  , bool TF2 > // Transpose flag of the right-hand side vector
886 inline EnableIf_< And< IsDenseVector<VT1>
887  , Or< Not< IsSMPAssignable<VT1> >
888  , Not< IsSMPAssignable<VT2> > > > >
889  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
890 {
892 
893  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
894 
895  multAssign( ~lhs, ~rhs );
896 }
898 //*************************************************************************************************
899 
900 
901 //*************************************************************************************************
920 template< typename VT1 // Type of the left-hand side dense vector
921  , bool TF1 // Transpose flag of the left-hand side dense vector
922  , typename VT2 // Type of the right-hand side vector
923  , bool TF2 > // Transpose flag of the right-hand side vector
924 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
925  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
926 {
928 
929  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
930  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
931 
932  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
933 
935  {
936  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
937  multAssign( ~lhs, ~rhs );
938  }
939  else {
940  smpMultAssign_backend( ~lhs, ~rhs );
941  }
942  }
943 }
945 //*************************************************************************************************
946 
947 
948 
949 
950 //=================================================================================================
951 //
952 // DIVISION ASSIGNMENT
953 //
954 //=================================================================================================
955 
956 //*************************************************************************************************
973 template< typename VT1 // Type of the left-hand side dense vector
974  , bool TF1 // Transpose flag of the left-hand side dense vector
975  , typename VT2 // Type of the right-hand side dense vector
976  , bool TF2 > // Transpose flag of the right-hand side dense vector
977 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
978 {
980 
981  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
982 
983  typedef ElementType_<VT1> ET1;
984  typedef ElementType_<VT2> ET2;
985  typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
986  typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
987 
988  enum : size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >::size };
989 
990  const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
991  const bool lhsAligned ( (~lhs).isAligned() );
992  const bool rhsAligned ( (~rhs).isAligned() );
993 
994  const size_t threads ( TheThreadBackend::size() );
995  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
996  const size_t equalShare ( (~lhs).size() / threads + addon );
997  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
998  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
999 
1000  for( size_t i=0UL; i<threads; ++i )
1001  {
1002  const size_t index( i*sizePerThread );
1003 
1004  if( index >= (~lhs).size() )
1005  continue;
1006 
1007  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
1008 
1009  if( simdEnabled && lhsAligned && rhsAligned ) {
1010  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
1011  TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index, size ) );
1012  }
1013  else if( simdEnabled && lhsAligned ) {
1014  AlignedTarget target( subvector<aligned>( ~lhs, index, size ) );
1015  TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index, size ) );
1016  }
1017  else if( simdEnabled && rhsAligned ) {
1018  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
1019  TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index, size ) );
1020  }
1021  else {
1022  UnalignedTarget target( subvector<unaligned>( ~lhs, index, size ) );
1023  TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index, size ) );
1024  }
1025  }
1026 
1027  TheThreadBackend::wait();
1028 }
1030 //*************************************************************************************************
1031 
1032 
1033 //*************************************************************************************************
1052 template< typename VT1 // Type of the left-hand side dense vector
1053  , bool TF1 // Transpose flag of the left-hand side dense vector
1054  , typename VT2 // Type of the right-hand side vector
1055  , bool TF2 > // Transpose flag of the right-hand side vector
1056 inline EnableIf_< And< IsDenseVector<VT1>
1057  , Or< Not< IsSMPAssignable<VT1> >
1058  , Not< IsSMPAssignable<VT2> > > > >
1059  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1060 {
1062 
1063  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1064 
1065  divAssign( ~lhs, ~rhs );
1066 }
1068 //*************************************************************************************************
1069 
1070 
1071 //*************************************************************************************************
1089 template< typename VT1 // Type of the left-hand side dense vector
1090  , bool TF1 // Transpose flag of the left-hand side dense vector
1091  , typename VT2 // Type of the right-hand side vector
1092  , bool TF2 > // Transpose flag of the right-hand side vector
1093 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1094  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1095 {
1097 
1098  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
1099  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
1100 
1101  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1102 
1104  {
1105  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1106  divAssign( ~lhs, ~rhs );
1107  }
1108  else {
1109  smpDivAssign_backend( ~lhs, ~rhs );
1110  }
1111  }
1112 }
1114 //*************************************************************************************************
1115 
1116 
1117 
1118 
1119 //=================================================================================================
1120 //
1121 // COMPILE TIME CONSTRAINTS
1122 //
1123 //=================================================================================================
1124 
1125 //*************************************************************************************************
1127 namespace {
1128 
1130 
1131 }
1133 //*************************************************************************************************
1134 
1135 } // namespace blaze
1136 
1137 #endif
Header file for auxiliary alias declarations.
Header file for mathematical functions.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:258
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
Header file for the IsSame and IsStrictlySame type traits.
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1669
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the C++11 and Boost thread backend.
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.