DenseVector.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_THREADS_DENSEVECTOR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
55 #include <blaze/system/SMP.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/mpl/And.h>
61 #include <blaze/util/mpl/Not.h>
62 #include <blaze/util/mpl/Or.h>
64 #include <blaze/util/Types.h>
65 
66 
67 namespace blaze {
68 
69 //=================================================================================================
70 //
71 // PLAIN ASSIGNMENT
72 //
73 //=================================================================================================
74 
75 //*************************************************************************************************
91 template< typename VT1 // Type of the left-hand side dense vector
92  , bool TF1 // Transpose flag of the left-hand side dense vector
93  , typename VT2 // Type of the right-hand side dense vector
94  , bool TF2 > // Transpose flag of the right-hand side dense vector
95 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
96 {
98 
99  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
100 
101  using ET1 = ElementType_<VT1>;
102  using ET2 = ElementType_<VT2>;
103 
104  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
105  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
106 
107  const bool lhsAligned( (~lhs).isAligned() );
108  const bool rhsAligned( (~rhs).isAligned() );
109 
110  const size_t threads ( TheThreadBackend::size() );
111  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
112  const size_t equalShare ( (~lhs).size() / threads + addon );
113  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
114  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
115 
116  for( size_t i=0UL; i<threads; ++i )
117  {
118  const size_t index( i*sizePerThread );
119 
120  if( index >= (~lhs).size() )
121  continue;
122 
123  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
124 
125  if( simdEnabled && lhsAligned && rhsAligned ) {
126  auto target( subvector<aligned>( ~lhs, index, size ) );
127  TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index, size ) );
128  }
129  else if( simdEnabled && lhsAligned ) {
130  auto target( subvector<aligned>( ~lhs, index, size ) );
131  TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
132  }
133  else if( simdEnabled && rhsAligned ) {
134  auto target( subvector<unaligned>( ~lhs, index, size ) );
135  TheThreadBackend::scheduleAssign( target, subvector<aligned>( ~rhs, index, size ) );
136  }
137  else {
138  auto target( subvector<unaligned>( ~lhs, index, size ) );
139  TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
140  }
141  }
142 
143  TheThreadBackend::wait();
144 }
146 //*************************************************************************************************
147 
148 
149 //*************************************************************************************************
166 template< typename VT1 // Type of the left-hand side dense vector
167  , bool TF1 // Transpose flag of the left-hand side dense vector
168  , typename VT2 // Type of the right-hand side sparse vector
169  , bool TF2 > // Transpose flag of the right-hand side sparse vector
170 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
171 {
173 
174  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
175 
176  const size_t threads ( TheThreadBackend::size() );
177  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
178  const size_t sizePerThread( (~lhs).size() / threads + addon );
179 
180  for( size_t i=0UL; i<threads; ++i )
181  {
182  const size_t index( i*sizePerThread );
183 
184  if( index >= (~lhs).size() )
185  continue;
186 
187  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
188  auto target( subvector<unaligned>( ~lhs, index, size ) );
189  TheThreadBackend::scheduleAssign( target, subvector<unaligned>( ~rhs, index, size ) );
190  }
191 
192  TheThreadBackend::wait();
193 }
195 //*************************************************************************************************
196 
197 
198 //*************************************************************************************************
216 template< typename VT1 // Type of the left-hand side dense vector
217  , bool TF1 // Transpose flag of the left-hand side dense vector
218  , typename VT2 // Type of the right-hand side vector
219  , bool TF2 > // Transpose flag of the right-hand side vector
220 inline EnableIf_< And< IsDenseVector<VT1>
221  , Or< Not< IsSMPAssignable<VT1> >
222  , Not< IsSMPAssignable<VT2> > > > >
223  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
224 {
226 
227  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
228 
229  assign( ~lhs, ~rhs );
230 }
232 //*************************************************************************************************
233 
234 
235 //*************************************************************************************************
253 template< typename VT1 // Type of the left-hand side dense vector
254  , bool TF1 // Transpose flag of the left-hand side dense vector
255  , typename VT2 // Type of the right-hand side vector
256  , bool TF2 > // Transpose flag of the right-hand side vector
257 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
258  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
259 {
261 
262  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
263  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
264 
265  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
266 
268  {
269  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
270  assign( ~lhs, ~rhs );
271  }
272  else {
273  smpAssign_backend( ~lhs, ~rhs );
274  }
275  }
276 }
278 //*************************************************************************************************
279 
280 
281 
282 
283 //=================================================================================================
284 //
285 // ADDITION ASSIGNMENT
286 //
287 //=================================================================================================
288 
289 //*************************************************************************************************
306 template< typename VT1 // Type of the left-hand side dense vector
307  , bool TF1 // Transpose flag of the left-hand side dense vector
308  , typename VT2 // Type of the right-hand side dense vector
309  , bool TF2 > // Transpose flag of the right-hand side dense vector
310 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
311 {
313 
314  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
315 
316  using ET1 = ElementType_<VT1>;
317  using ET2 = ElementType_<VT2>;
318 
319  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
320  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
321 
322  const bool lhsAligned( (~lhs).isAligned() );
323  const bool rhsAligned( (~rhs).isAligned() );
324 
325  const size_t threads ( TheThreadBackend::size() );
326  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
327  const size_t equalShare ( (~lhs).size() / threads + addon );
328  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
329  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
330 
331  for( size_t i=0UL; i<threads; ++i )
332  {
333  const size_t index( i*sizePerThread );
334 
335  if( index >= (~lhs).size() )
336  continue;
337 
338  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
339 
340  if( simdEnabled && lhsAligned && rhsAligned ) {
341  auto target( subvector<aligned>( ~lhs, index, size ) );
342  TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index, size ) );
343  }
344  else if( simdEnabled && lhsAligned ) {
345  auto target( subvector<aligned>( ~lhs, index, size ) );
346  TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
347  }
348  else if( simdEnabled && rhsAligned ) {
349  auto target( subvector<unaligned>( ~lhs, index, size ) );
350  TheThreadBackend::scheduleAddAssign( target, subvector<aligned>( ~rhs, index, size ) );
351  }
352  else {
353  auto target( subvector<unaligned>( ~lhs, index, size ) );
354  TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
355  }
356  }
357 
358  TheThreadBackend::wait();
359 }
361 //*************************************************************************************************
362 
363 
364 //*************************************************************************************************
381 template< typename VT1 // Type of the left-hand side dense vector
382  , bool TF1 // Transpose flag of the left-hand side dense vector
383  , typename VT2 // Type of the right-hand side sparse vector
384  , bool TF2 > // Transpose flag of the right-hand side sparse vector
385 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
386 {
388 
389  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
390 
391  const size_t threads ( TheThreadBackend::size() );
392  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
393  const size_t sizePerThread( (~lhs).size() / threads + addon );
394 
395  for( size_t i=0UL; i<threads; ++i )
396  {
397  const size_t index( i*sizePerThread );
398 
399  if( index >= (~lhs).size() )
400  continue;
401 
402  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
403  auto target( subvector<unaligned>( ~lhs, index, size ) );
404  TheThreadBackend::scheduleAddAssign( target, subvector<unaligned>( ~rhs, index, size ) );
405  }
406 
407  TheThreadBackend::wait();
408 }
410 //*************************************************************************************************
411 
412 
413 //*************************************************************************************************
432 template< typename VT1 // Type of the left-hand side dense vector
433  , bool TF1 // Transpose flag of the left-hand side dense vector
434  , typename VT2 // Type of the right-hand side vector
435  , bool TF2 > // Transpose flag of the right-hand side vector
436 inline EnableIf_< And< IsDenseVector<VT1>
437  , Or< Not< IsSMPAssignable<VT1> >
438  , Not< IsSMPAssignable<VT2> > > > >
439  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
440 {
442 
443  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
444 
445  addAssign( ~lhs, ~rhs );
446 }
448 //*************************************************************************************************
449 
450 
451 //*************************************************************************************************
469 template< typename VT1 // Type of the left-hand side dense vector
470  , bool TF1 // Transpose flag of the left-hand side dense vector
471  , typename VT2 // Type of the right-hand side vector
472  , bool TF2 > // Transpose flag of the right-hand side vector
473 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
474  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
475 {
477 
478  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
479  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
480 
481  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
482 
484  {
485  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
486  addAssign( ~lhs, ~rhs );
487  }
488  else {
489  smpAddAssign_backend( ~lhs, ~rhs );
490  }
491  }
492 }
494 //*************************************************************************************************
495 
496 
497 
498 
499 //=================================================================================================
500 //
501 // SUBTRACTION ASSIGNMENT
502 //
503 //=================================================================================================
504 
505 //*************************************************************************************************
522 template< typename VT1 // Type of the left-hand side dense vector
523  , bool TF1 // Transpose flag of the left-hand side dense vector
524  , typename VT2 // Type of the right-hand side dense vector
525  , bool TF2 > // Transpose flag of the right-hand side dense vector
526 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
527 {
529 
530  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
531 
532  using ET1 = ElementType_<VT1>;
533  using ET2 = ElementType_<VT2>;
534 
535  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
536  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
537 
538  const bool lhsAligned( (~lhs).isAligned() );
539  const bool rhsAligned( (~rhs).isAligned() );
540 
541  const size_t threads ( TheThreadBackend::size() );
542  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
543  const size_t equalShare ( (~lhs).size() / threads + addon );
544  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
545  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
546 
547  for( size_t i=0UL; i<threads; ++i )
548  {
549  const size_t index( i*sizePerThread );
550 
551  if( index >= (~lhs).size() )
552  continue;
553 
554  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
555 
556  if( simdEnabled && lhsAligned && rhsAligned ) {
557  auto target( subvector<aligned>( ~lhs, index, size ) );
558  TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index, size ) );
559  }
560  else if( simdEnabled && lhsAligned ) {
561  auto target( subvector<aligned>( ~lhs, index, size ) );
562  TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
563  }
564  else if( simdEnabled && rhsAligned ) {
565  auto target( subvector<unaligned>( ~lhs, index, size ) );
566  TheThreadBackend::scheduleSubAssign( target, subvector<aligned>( ~rhs, index, size ) );
567  }
568  else {
569  auto target( subvector<unaligned>( ~lhs, index, size ) );
570  TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
571  }
572  }
573 
574  TheThreadBackend::wait();
575 }
577 //*************************************************************************************************
578 
579 
580 //*************************************************************************************************
597 template< typename VT1 // Type of the left-hand side dense vector
598  , bool TF1 // Transpose flag of the left-hand side dense vector
599  , typename VT2 // Type of the right-hand side sparse vector
600  , bool TF2 > // Transpose flag of the right-hand side sparse vector
601 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
602 {
604 
605  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
606 
607  const size_t threads ( TheThreadBackend::size() );
608  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
609  const size_t sizePerThread( (~lhs).size() / threads + addon );
610 
611  for( size_t i=0UL; i<threads; ++i )
612  {
613  const size_t index( i*sizePerThread );
614 
615  if( index >= (~lhs).size() )
616  continue;
617 
618  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
619  auto target( subvector<unaligned>( ~lhs, index, size ) );
620  TheThreadBackend::scheduleSubAssign( target, subvector<unaligned>( ~rhs, index, size ) );
621  }
622 
623  TheThreadBackend::wait();
624 }
626 //*************************************************************************************************
627 
628 
629 //*************************************************************************************************
648 template< typename VT1 // Type of the left-hand side dense vector
649  , bool TF1 // Transpose flag of the left-hand side dense vector
650  , typename VT2 // Type of the right-hand side vector
651  , bool TF2 > // Transpose flag of the right-hand side vector
652 inline EnableIf_< And< IsDenseVector<VT1>
653  , Or< Not< IsSMPAssignable<VT1> >
654  , Not< IsSMPAssignable<VT2> > > > >
655  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
656 {
658 
659  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
660 
661  subAssign( ~lhs, ~rhs );
662 }
664 //*************************************************************************************************
665 
666 
667 //*************************************************************************************************
686 template< typename VT1 // Type of the left-hand side dense vector
687  , bool TF1 // Transpose flag of the left-hand side dense vector
688  , typename VT2 // Type of the right-hand side vector
689  , bool TF2 > // Transpose flag of the right-hand side vector
690 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
691  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
692 {
694 
695  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
696  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
697 
698  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
699 
701  {
702  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
703  subAssign( ~lhs, ~rhs );
704  }
705  else {
706  smpSubAssign_backend( ~lhs, ~rhs );
707  }
708  }
709 }
711 //*************************************************************************************************
712 
713 
714 
715 
716 //=================================================================================================
717 //
718 // MULTIPLICATION ASSIGNMENT
719 //
720 //=================================================================================================
721 
722 //*************************************************************************************************
739 template< typename VT1 // Type of the left-hand side dense vector
740  , bool TF1 // Transpose flag of the left-hand side dense vector
741  , typename VT2 // Type of the right-hand side dense vector
742  , bool TF2 > // Transpose flag of the right-hand side dense vector
743 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
744 {
746 
747  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
748 
749  using ET1 = ElementType_<VT1>;
750  using ET2 = ElementType_<VT2>;
751 
752  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
753  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
754 
755  const bool lhsAligned( (~lhs).isAligned() );
756  const bool rhsAligned( (~rhs).isAligned() );
757 
758  const size_t threads ( TheThreadBackend::size() );
759  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
760  const size_t equalShare ( (~lhs).size() / threads + addon );
761  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
762  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
763 
764  for( size_t i=0UL; i<threads; ++i )
765  {
766  const size_t index( i*sizePerThread );
767 
768  if( index >= (~lhs).size() )
769  continue;
770 
771  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
772 
773  if( simdEnabled && lhsAligned && rhsAligned ) {
774  auto target( subvector<aligned>( ~lhs, index, size ) );
775  TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index, size ) );
776  }
777  else if( simdEnabled && lhsAligned ) {
778  auto target( subvector<aligned>( ~lhs, index, size ) );
779  TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
780  }
781  else if( simdEnabled && rhsAligned ) {
782  auto target( subvector<unaligned>( ~lhs, index, size ) );
783  TheThreadBackend::scheduleMultAssign( target, subvector<aligned>( ~rhs, index, size ) );
784  }
785  else {
786  auto target( subvector<unaligned>( ~lhs, index, size ) );
787  TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
788  }
789  }
790 
791  TheThreadBackend::wait();
792 }
794 //*************************************************************************************************
795 
796 
797 //*************************************************************************************************
814 template< typename VT1 // Type of the left-hand side dense vector
815  , bool TF1 // Transpose flag of the left-hand side dense vector
816  , typename VT2 // Type of the right-hand side sparse vector
817  , bool TF2 > // Transpose flag of the right-hand side sparse vector
818 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
819 {
821 
822  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
823 
824  const size_t threads ( TheThreadBackend::size() );
825  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
826  const size_t sizePerThread( (~lhs).size() / threads + addon );
827 
828  for( size_t i=0UL; i<threads; ++i )
829  {
830  const size_t index( i*sizePerThread );
831 
832  if( index >= (~lhs).size() )
833  continue;
834 
835  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
836  auto target( subvector<unaligned>( ~lhs, index, size ) );
837  TheThreadBackend::scheduleMultAssign( target, subvector<unaligned>( ~rhs, index, size ) );
838  }
839 
840  TheThreadBackend::wait();
841 }
843 //*************************************************************************************************
844 
845 
846 //*************************************************************************************************
865 template< typename VT1 // Type of the left-hand side dense vector
866  , bool TF1 // Transpose flag of the left-hand side dense vector
867  , typename VT2 // Type of the right-hand side vector
868  , bool TF2 > // Transpose flag of the right-hand side vector
869 inline EnableIf_< And< IsDenseVector<VT1>
870  , Or< Not< IsSMPAssignable<VT1> >
871  , Not< IsSMPAssignable<VT2> > > > >
872  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
873 {
875 
876  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
877 
878  multAssign( ~lhs, ~rhs );
879 }
881 //*************************************************************************************************
882 
883 
884 //*************************************************************************************************
903 template< typename VT1 // Type of the left-hand side dense vector
904  , bool TF1 // Transpose flag of the left-hand side dense vector
905  , typename VT2 // Type of the right-hand side vector
906  , bool TF2 > // Transpose flag of the right-hand side vector
907 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
908  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
909 {
911 
912  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
913  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
914 
915  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
916 
918  {
919  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
920  multAssign( ~lhs, ~rhs );
921  }
922  else {
923  smpMultAssign_backend( ~lhs, ~rhs );
924  }
925  }
926 }
928 //*************************************************************************************************
929 
930 
931 
932 
933 //=================================================================================================
934 //
935 // DIVISION ASSIGNMENT
936 //
937 //=================================================================================================
938 
939 //*************************************************************************************************
956 template< typename VT1 // Type of the left-hand side dense vector
957  , bool TF1 // Transpose flag of the left-hand side dense vector
958  , typename VT2 // Type of the right-hand side dense vector
959  , bool TF2 > // Transpose flag of the right-hand side dense vector
960 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
961 {
963 
964  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
965 
966  using ET1 = ElementType_<VT1>;
967  using ET2 = ElementType_<VT2>;
968 
969  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
970  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
971 
972  const bool lhsAligned( (~lhs).isAligned() );
973  const bool rhsAligned( (~rhs).isAligned() );
974 
975  const size_t threads ( TheThreadBackend::size() );
976  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
977  const size_t equalShare ( (~lhs).size() / threads + addon );
978  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
979  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
980 
981  for( size_t i=0UL; i<threads; ++i )
982  {
983  const size_t index( i*sizePerThread );
984 
985  if( index >= (~lhs).size() )
986  continue;
987 
988  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
989 
990  if( simdEnabled && lhsAligned && rhsAligned ) {
991  auto target( subvector<aligned>( ~lhs, index, size ) );
992  TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index, size ) );
993  }
994  else if( simdEnabled && lhsAligned ) {
995  auto target( subvector<aligned>( ~lhs, index, size ) );
996  TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index, size ) );
997  }
998  else if( simdEnabled && rhsAligned ) {
999  auto target( subvector<unaligned>( ~lhs, index, size ) );
1000  TheThreadBackend::scheduleDivAssign( target, subvector<aligned>( ~rhs, index, size ) );
1001  }
1002  else {
1003  auto target( subvector<unaligned>( ~lhs, index, size ) );
1004  TheThreadBackend::scheduleDivAssign( target, subvector<unaligned>( ~rhs, index, size ) );
1005  }
1006  }
1007 
1008  TheThreadBackend::wait();
1009 }
1011 //*************************************************************************************************
1012 
1013 
1014 //*************************************************************************************************
1033 template< typename VT1 // Type of the left-hand side dense vector
1034  , bool TF1 // Transpose flag of the left-hand side dense vector
1035  , typename VT2 // Type of the right-hand side vector
1036  , bool TF2 > // Transpose flag of the right-hand side vector
1037 inline EnableIf_< And< IsDenseVector<VT1>
1038  , Or< Not< IsSMPAssignable<VT1> >
1039  , Not< IsSMPAssignable<VT2> > > > >
1040  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1041 {
1043 
1044  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1045 
1046  divAssign( ~lhs, ~rhs );
1047 }
1049 //*************************************************************************************************
1050 
1051 
1052 //*************************************************************************************************
1070 template< typename VT1 // Type of the left-hand side dense vector
1071  , bool TF1 // Transpose flag of the left-hand side dense vector
1072  , typename VT2 // Type of the right-hand side vector
1073  , bool TF2 > // Transpose flag of the right-hand side vector
1074 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1075  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1076 {
1078 
1079  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
1080  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
1081 
1082  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1083 
1085  {
1086  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1087  divAssign( ~lhs, ~rhs );
1088  }
1089  else {
1090  smpDivAssign_backend( ~lhs, ~rhs );
1091  }
1092  }
1093 }
1095 //*************************************************************************************************
1096 
1097 
1098 
1099 
1100 //=================================================================================================
1101 //
1102 // COMPILE TIME CONSTRAINTS
1103 //
1104 //=================================================================================================
1105 
1106 //*************************************************************************************************
1108 namespace {
1109 
1111 
1112 }
1114 //*************************************************************************************************
1115 
1116 } // namespace blaze
1117 
1118 #endif
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
#define BLAZE_BOOST_THREADS_PARALLEL_MODE
Compilation switch for the Boost parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:122
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
#define BLAZE_CPP_THREADS_PARALLEL_MODE
Compilation switch for the C++11 parallelization.This compilation switch enables/disables the paralle...
Definition: SMP.h:95
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
Header file for the C++11 and Boost thread backend.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.