DenseVector.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
44 #include <blaze/math/Aliases.h>
55 #include <blaze/system/SMP.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/mpl/And.h>
61 #include <blaze/util/mpl/Not.h>
62 #include <blaze/util/mpl/Or.h>
64 #include <blaze/util/Types.h>
65 
66 
67 namespace blaze {
68 
69 //=================================================================================================
70 //
71 // PLAIN ASSIGNMENT
72 //
73 //=================================================================================================
74 
75 //*************************************************************************************************
91 template< typename VT1 // Type of the left-hand side dense vector
92  , bool TF1 // Transpose flag of the left-hand side dense vector
93  , typename VT2 // Type of the right-hand side dense vector
94  , bool TF2 > // Transpose flag of the right-hand side dense vector
95 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
96 {
98 
99  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
100 
101  using ET1 = ElementType_<VT1>;
102  using ET2 = ElementType_<VT2>;
103 
104  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
105  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
106 
107  const bool lhsAligned( (~lhs).isAligned() );
108  const bool rhsAligned( (~rhs).isAligned() );
109 
110  const int threads ( omp_get_num_threads() );
111  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
112  const size_t equalShare ( (~lhs).size() / threads + addon );
113  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
114  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
115 
116 #pragma omp for schedule(dynamic,1) nowait
117  for( int i=0UL; i<threads; ++i )
118  {
119  const size_t index( i*sizePerThread );
120 
121  if( index >= (~lhs).size() )
122  continue;
123 
124  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
125 
126  if( simdEnabled && lhsAligned && rhsAligned ) {
127  auto target( subvector<aligned>( ~lhs, index, size ) );
128  assign( target, subvector<aligned>( ~rhs, index, size ) );
129  }
130  else if( simdEnabled && lhsAligned ) {
131  auto target( subvector<aligned>( ~lhs, index, size ) );
132  assign( target, subvector<unaligned>( ~rhs, index, size ) );
133  }
134  else if( simdEnabled && rhsAligned ) {
135  auto target( subvector<unaligned>( ~lhs, index, size ) );
136  assign( target, subvector<aligned>( ~rhs, index, size ) );
137  }
138  else {
139  auto target( subvector<unaligned>( ~lhs, index, size ) );
140  assign( target, subvector<unaligned>( ~rhs, index, size ) );
141  }
142  }
143 }
145 //*************************************************************************************************
146 
147 
148 //*************************************************************************************************
164 template< typename VT1 // Type of the left-hand side dense vector
165  , bool TF1 // Transpose flag of the left-hand side dense vector
166  , typename VT2 // Type of the right-hand side sparse vector
167  , bool TF2 > // Transpose flag of the right-hand side sparse vector
168 void smpAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
169 {
171 
172  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
173 
174  const int threads ( omp_get_num_threads() );
175  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
176  const size_t sizePerThread( (~lhs).size() / threads + addon );
177 
178 #pragma omp for schedule(dynamic,1) nowait
179  for( int i=0UL; i<threads; ++i )
180  {
181  const size_t index( i*sizePerThread );
182 
183  if( index >= (~lhs).size() )
184  continue;
185 
186  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
187  auto target( subvector<unaligned>( ~lhs, index, size ) );
188  assign( target, subvector<unaligned>( ~rhs, index, size ) );
189  }
190 }
192 //*************************************************************************************************
193 
194 
195 //*************************************************************************************************
213 template< typename VT1 // Type of the left-hand side dense vector
214  , bool TF1 // Transpose flag of the left-hand side dense vector
215  , typename VT2 // Type of the right-hand side vector
216  , bool TF2 > // Transpose flag of the right-hand side vector
217 inline EnableIf_< And< IsDenseVector<VT1>
218  , Or< Not< IsSMPAssignable<VT1> >
219  , Not< IsSMPAssignable<VT2> > > > >
220  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
221 {
223 
224  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
225 
226  assign( ~lhs, ~rhs );
227 }
229 //*************************************************************************************************
230 
231 
232 //*************************************************************************************************
250 template< typename VT1 // Type of the left-hand side dense vector
251  , bool TF1 // Transpose flag of the left-hand side dense vector
252  , typename VT2 // Type of the right-hand side vector
253  , bool TF2 > // Transpose flag of the right-hand side vector
254 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
255  smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
256 {
258 
259  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
260  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
261 
262  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
263 
265  {
266  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
267  assign( ~lhs, ~rhs );
268  }
269  else {
270 #pragma omp parallel shared( lhs, rhs )
271  smpAssign_backend( ~lhs, ~rhs );
272  }
273  }
274 }
276 //*************************************************************************************************
277 
278 
279 
280 
281 //=================================================================================================
282 //
283 // ADDITION ASSIGNMENT
284 //
285 //=================================================================================================
286 
287 //*************************************************************************************************
303 template< typename VT1 // Type of the left-hand side dense vector
304  , bool TF1 // Transpose flag of the left-hand side dense vector
305  , typename VT2 // Type of the right-hand side dense vector
306  , bool TF2 > // Transpose flag of the right-hand side dense vector
307 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
308 {
310 
311  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
312 
313  using ET1 = ElementType_<VT1>;
314  using ET2 = ElementType_<VT2>;
315 
316  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
317  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
318 
319  const bool lhsAligned( (~lhs).isAligned() );
320  const bool rhsAligned( (~rhs).isAligned() );
321 
322  const int threads ( omp_get_num_threads() );
323  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
324  const size_t equalShare ( (~lhs).size() / threads + addon );
325  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
326  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
327 
328 #pragma omp for schedule(dynamic,1) nowait
329  for( int i=0UL; i<threads; ++i )
330  {
331  const size_t index( i*sizePerThread );
332 
333  if( index >= (~lhs).size() )
334  continue;
335 
336  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
337 
338  if( simdEnabled && lhsAligned && rhsAligned ) {
339  auto target( subvector<aligned>( ~lhs, index, size ) );
340  addAssign( target, subvector<aligned>( ~rhs, index, size ) );
341  }
342  else if( simdEnabled && lhsAligned ) {
343  auto target( subvector<aligned>( ~lhs, index, size ) );
344  addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
345  }
346  else if( simdEnabled && rhsAligned ) {
347  auto target( subvector<unaligned>( ~lhs, index, size ) );
348  addAssign( target, subvector<aligned>( ~rhs, index, size ) );
349  }
350  else {
351  auto target( subvector<unaligned>( ~lhs, index, size ) );
352  addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
353  }
354  }
355 }
357 //*************************************************************************************************
358 
359 
360 //*************************************************************************************************
376 template< typename VT1 // Type of the left-hand side dense vector
377  , bool TF1 // Transpose flag of the left-hand side dense vector
378  , typename VT2 // Type of the right-hand side sparse vector
379  , bool TF2 > // Transpose flag of the right-hand side sparse vector
380 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
381 {
383 
384  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
385 
386  const int threads ( omp_get_num_threads() );
387  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
388  const size_t sizePerThread( (~lhs).size() / threads + addon );
389 
390 #pragma omp for schedule(dynamic,1) nowait
391  for( int i=0UL; i<threads; ++i )
392  {
393  const size_t index( i*sizePerThread );
394 
395  if( index >= (~lhs).size() )
396  continue;
397 
398  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
399  auto target( subvector<unaligned>( ~lhs, index, size ) );
400  addAssign( target, subvector<unaligned>( ~rhs, index, size ) );
401  }
402 }
404 //*************************************************************************************************
405 
406 
407 //*************************************************************************************************
425 template< typename VT1 // Type of the left-hand side dense vector
426  , bool TF1 // Transpose flag of the left-hand side dense vector
427  , typename VT2 // Type of the right-hand side vector
428  , bool TF2 > // Transpose flag of the right-hand side vector
429 inline EnableIf_< And< IsDenseVector<VT1>
430  , Or< Not< IsSMPAssignable<VT1> >
431  , Not< IsSMPAssignable<VT2> > > > >
432  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
433 {
435 
436  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
437 
438  addAssign( ~lhs, ~rhs );
439 }
441 //*************************************************************************************************
442 
443 
444 //*************************************************************************************************
462 template< typename VT1 // Type of the left-hand side dense vector
463  , bool TF1 // Transpose flag of the left-hand side dense vector
464  , typename VT2 // Type of the right-hand side vector
465  , bool TF2 > // Transpose flag of the right-hand side vector
466 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
467  smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
468 {
470 
471  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
472  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
473 
474  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
475 
477  {
478  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
479  addAssign( ~lhs, ~rhs );
480  }
481  else {
482 #pragma omp parallel shared( lhs, rhs )
483  smpAddAssign_backend( ~lhs, ~rhs );
484  }
485  }
486 }
488 //*************************************************************************************************
489 
490 
491 
492 
493 //=================================================================================================
494 //
495 // SUBTRACTION ASSIGNMENT
496 //
497 //=================================================================================================
498 
499 //*************************************************************************************************
515 template< typename VT1 // Type of the left-hand side dense vector
516  , bool TF1 // Transpose flag of the left-hand side dense vector
517  , typename VT2 // Type of the right-hand side dense vector
518  , bool TF2 > // Transpose flag of the right-hand side dense vector
519 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
520 {
522 
523  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
524 
525  using ET1 = ElementType_<VT1>;
526  using ET2 = ElementType_<VT2>;
527 
528  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
529  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
530 
531  const bool lhsAligned( (~lhs).isAligned() );
532  const bool rhsAligned( (~rhs).isAligned() );
533 
534  const int threads ( omp_get_num_threads() );
535  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
536  const size_t equalShare ( (~lhs).size() / threads + addon );
537  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
538  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
539 
540 #pragma omp for schedule(dynamic,1) nowait
541  for( int i=0UL; i<threads; ++i )
542  {
543  const size_t index( i*sizePerThread );
544 
545  if( index >= (~lhs).size() )
546  continue;
547 
548  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
549 
550  if( simdEnabled && lhsAligned && rhsAligned ) {
551  auto target( subvector<aligned>( ~lhs, index, size ) );
552  subAssign( target, subvector<aligned>( ~rhs, index, size ) );
553  }
554  else if( simdEnabled && lhsAligned ) {
555  auto target( subvector<aligned>( ~lhs, index, size ) );
556  subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
557  }
558  else if( simdEnabled && rhsAligned ) {
559  auto target( subvector<unaligned>( ~lhs, index, size ) );
560  subAssign( target, subvector<aligned>( ~rhs, index, size ) );
561  }
562  else {
563  auto target( subvector<unaligned>( ~lhs, index, size ) );
564  subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
565  }
566  }
567 }
569 //*************************************************************************************************
570 
571 
572 //*************************************************************************************************
588 template< typename VT1 // Type of the left-hand side dense vector
589  , bool TF1 // Transpose flag of the left-hand side dense vector
590  , typename VT2 // Type of the right-hand side sparse vector
591  , bool TF2 > // Transpose flag of the right-hand side sparse vector
592 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
593 {
595 
596  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
597 
598  const int threads ( omp_get_num_threads() );
599  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
600  const size_t sizePerThread( (~lhs).size() / threads + addon );
601 
602 #pragma omp for schedule(dynamic,1) nowait
603  for( int i=0UL; i<threads; ++i )
604  {
605  const size_t index( i*sizePerThread );
606 
607  if( index >= (~lhs).size() )
608  continue;
609 
610  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
611  auto target( subvector<unaligned>( ~lhs, index, size ) );
612  subAssign( target, subvector<unaligned>( ~rhs, index, size ) );
613  }
614 }
616 //*************************************************************************************************
617 
618 
619 //*************************************************************************************************
637 template< typename VT1 // Type of the left-hand side dense vector
638  , bool TF1 // Transpose flag of the left-hand side dense vector
639  , typename VT2 // Type of the right-hand side vector
640  , bool TF2 > // Transpose flag of the right-hand side vector
641 inline EnableIf_< And< IsDenseVector<VT1>
642  , Or< Not< IsSMPAssignable<VT1> >
643  , Not< IsSMPAssignable<VT2> > > > >
644  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
645 {
647 
648  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
649 
650  subAssign( ~lhs, ~rhs );
651 }
653 //*************************************************************************************************
654 
655 
656 //*************************************************************************************************
674 template< typename VT1 // Type of the left-hand side dense vector
675  , bool TF1 // Transpose flag of the left-hand side dense vector
676  , typename VT2 // Type of the right-hand side vector
677  , bool TF2 > // Transpose flag of the right-hand side vector
678 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
679  smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
680 {
682 
683  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
684  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
685 
686  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
687 
689  {
690  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
691  subAssign( ~lhs, ~rhs );
692  }
693  else {
694 #pragma omp parallel shared( lhs, rhs )
695  smpSubAssign_backend( ~lhs, ~rhs );
696  }
697  }
698 }
700 //*************************************************************************************************
701 
702 
703 
704 
705 //=================================================================================================
706 //
707 // MULTIPLICATION ASSIGNMENT
708 //
709 //=================================================================================================
710 
711 //*************************************************************************************************
728 template< typename VT1 // Type of the left-hand side dense vector
729  , bool TF1 // Transpose flag of the left-hand side dense vector
730  , typename VT2 // Type of the right-hand side dense vector
731  , bool TF2 > // Transpose flag of the right-hand side dense vector
732 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
733 {
735 
736  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
737 
738  using ET1 = ElementType_<VT1>;
739  using ET2 = ElementType_<VT2>;
740 
741  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
742  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
743 
744  const bool lhsAligned( (~lhs).isAligned() );
745  const bool rhsAligned( (~rhs).isAligned() );
746 
747  const int threads ( omp_get_num_threads() );
748  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
749  const size_t equalShare ( (~lhs).size() / threads + addon );
750  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
751  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
752 
753 #pragma omp for schedule(dynamic,1) nowait
754  for( int i=0UL; i<threads; ++i )
755  {
756  const size_t index( i*sizePerThread );
757 
758  if( index >= (~lhs).size() )
759  continue;
760 
761  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
762 
763  if( simdEnabled && lhsAligned && rhsAligned ) {
764  auto target( subvector<aligned>( ~lhs, index, size ) );
765  multAssign( target, subvector<aligned>( ~rhs, index, size ) );
766  }
767  else if( simdEnabled && lhsAligned ) {
768  auto target( subvector<aligned>( ~lhs, index, size ) );
769  multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
770  }
771  else if( simdEnabled && rhsAligned ) {
772  auto target( subvector<unaligned>( ~lhs, index, size ) );
773  multAssign( target, subvector<aligned>( ~rhs, index, size ) );
774  }
775  else {
776  auto target( subvector<unaligned>( ~lhs, index, size ) );
777  multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
778  }
779  }
780 }
782 //*************************************************************************************************
783 
784 
785 //*************************************************************************************************
802 template< typename VT1 // Type of the left-hand side dense vector
803  , bool TF1 // Transpose flag of the left-hand side dense vector
804  , typename VT2 // Type of the right-hand side sparse vector
805  , bool TF2 > // Transpose flag of the right-hand side sparse vector
806 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs )
807 {
809 
810  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
811 
812  const int threads ( omp_get_num_threads() );
813  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
814  const size_t sizePerThread( (~lhs).size() / threads + addon );
815 
816 #pragma omp for schedule(dynamic,1) nowait
817  for( int i=0UL; i<threads; ++i )
818  {
819  const size_t index( i*sizePerThread );
820 
821  if( index >= (~lhs).size() )
822  continue;
823 
824  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
825  auto target( subvector<unaligned>( ~lhs, index, size ) );
826  multAssign( target, subvector<unaligned>( ~rhs, index, size ) );
827  }
828 }
830 //*************************************************************************************************
831 
832 
833 //*************************************************************************************************
851 template< typename VT1 // Type of the left-hand side dense vector
852  , bool TF1 // Transpose flag of the left-hand side dense vector
853  , typename VT2 // Type of the right-hand side vector
854  , bool TF2 > // Transpose flag of the right-hand side vector
855 inline EnableIf_< And< IsDenseVector<VT1>
856  , Or< Not< IsSMPAssignable<VT1> >
857  , Not< IsSMPAssignable<VT2> > > > >
858  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
859 {
861 
862  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
863 
864  multAssign( ~lhs, ~rhs );
865 }
867 //*************************************************************************************************
868 
869 
870 //*************************************************************************************************
888 template< typename VT1 // Type of the left-hand side dense vector
889  , bool TF1 // Transpose flag of the left-hand side dense vector
890  , typename VT2 // Type of the right-hand side vector
891  , bool TF2 > // Transpose flag of the right-hand side vector
892 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
893  smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
894 {
896 
897  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
898  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
899 
900  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
901 
903  {
904  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
905  multAssign( ~lhs, ~rhs );
906  }
907  else {
908 #pragma omp parallel shared( lhs, rhs )
909  smpMultAssign_backend( ~lhs, ~rhs );
910  }
911  }
912 }
914 //*************************************************************************************************
915 
916 
917 
918 
919 //=================================================================================================
920 //
921 // DIVISION ASSIGNMENT
922 //
923 //=================================================================================================
924 
925 //*************************************************************************************************
941 template< typename VT1 // Type of the left-hand side dense vector
942  , bool TF1 // Transpose flag of the left-hand side dense vector
943  , typename VT2 // Type of the right-hand side dense vector
944  , bool TF2 > // Transpose flag of the right-hand side dense vector
945 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs )
946 {
948 
949  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
950 
951  using ET1 = ElementType_<VT1>;
952  using ET2 = ElementType_<VT2>;
953 
954  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
955  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::size );
956 
957  const bool lhsAligned( (~lhs).isAligned() );
958  const bool rhsAligned( (~rhs).isAligned() );
959 
960  const int threads ( omp_get_num_threads() );
961  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
962  const size_t equalShare ( (~lhs).size() / threads + addon );
963  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
964  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
965 
966 #pragma omp for schedule(dynamic,1) nowait
967  for( int i=0UL; i<threads; ++i )
968  {
969  const size_t index( i*sizePerThread );
970 
971  if( index >= (~lhs).size() )
972  continue;
973 
974  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
975 
976  if( simdEnabled && lhsAligned && rhsAligned ) {
977  auto target( subvector<aligned>( ~lhs, index, size ) );
978  divAssign( target, subvector<aligned>( ~rhs, index, size ) );
979  }
980  else if( simdEnabled && lhsAligned ) {
981  auto target( subvector<aligned>( ~lhs, index, size ) );
982  divAssign( target, subvector<unaligned>( ~rhs, index, size ) );
983  }
984  else if( simdEnabled && rhsAligned ) {
985  auto target( subvector<unaligned>( ~lhs, index, size ) );
986  divAssign( target, subvector<aligned>( ~rhs, index, size ) );
987  }
988  else {
989  auto target( subvector<unaligned>( ~lhs, index, size ) );
990  divAssign( target, subvector<unaligned>( ~rhs, index, size ) );
991  }
992  }
993 }
995 //*************************************************************************************************
996 
997 
998 //*************************************************************************************************
1016 template< typename VT1 // Type of the left-hand side dense vector
1017  , bool TF1 // Transpose flag of the left-hand side dense vector
1018  , typename VT2 // Type of the right-hand side vector
1019  , bool TF2 > // Transpose flag of the right-hand side vector
1020 inline EnableIf_< And< IsDenseVector<VT1>
1021  , Or< Not< IsSMPAssignable<VT1> >
1022  , Not< IsSMPAssignable<VT2> > > > >
1023  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1024 {
1026 
1027  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1028 
1029  divAssign( ~lhs, ~rhs );
1030 }
1032 //*************************************************************************************************
1033 
1034 
1035 //*************************************************************************************************
1053 template< typename VT1 // Type of the left-hand side dense vector
1054  , bool TF1 // Transpose flag of the left-hand side dense vector
1055  , typename VT2 // Type of the right-hand side vector
1056  , bool TF2 > // Transpose flag of the right-hand side vector
1057 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1058  smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
1059 {
1061 
1062  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT1> );
1063  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_<VT2> );
1064 
1065  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
1066 
1068  {
1069  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
1070  divAssign( ~lhs, ~rhs );
1071  }
1072  else {
1073 #pragma omp parallel shared( lhs, rhs )
1074  smpDivAssign_backend( ~lhs, ~rhs );
1075  }
1076  }
1077 }
1079 //*************************************************************************************************
1080 
1081 
1082 
1083 
1084 //=================================================================================================
1085 //
1086 // COMPILE TIME CONSTRAINTS
1087 //
1088 //=================================================================================================
1089 
1090 //*************************************************************************************************
1092 namespace {
1093 
1095 
1096 }
1098 //*************************************************************************************************
1099 
1100 } // namespace blaze
1101 
1102 #endif
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.