35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_ 91 template<
typename VT1
95 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
101 using ET1 = ElementType_<VT1>;
102 using ET2 = ElementType_<VT2>;
104 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
105 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
107 const bool lhsAligned( (~lhs).isAligned() );
108 const bool rhsAligned( (~rhs).isAligned() );
110 const int threads ( omp_get_num_threads() );
111 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
112 const size_t equalShare ( (~lhs).
size() / threads + addon );
113 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
114 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
116 #pragma omp for schedule(dynamic,1) nowait 117 for(
int i=0UL; i<threads; ++i )
119 const size_t index( i*sizePerThread );
121 if( index >= (~lhs).
size() )
124 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
126 if( simdEnabled && lhsAligned && rhsAligned ) {
127 auto target( subvector<aligned>( ~lhs, index,
size ) );
128 assign( target, subvector<aligned>( ~rhs, index,
size ) );
130 else if( simdEnabled && lhsAligned ) {
131 auto target( subvector<aligned>( ~lhs, index,
size ) );
132 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
134 else if( simdEnabled && rhsAligned ) {
135 auto target( subvector<unaligned>( ~lhs, index,
size ) );
136 assign( target, subvector<aligned>( ~rhs, index,
size ) );
139 auto target( subvector<unaligned>( ~lhs, index,
size ) );
140 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
164 template<
typename VT1
168 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
174 const int threads ( omp_get_num_threads() );
175 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
176 const size_t sizePerThread( (~lhs).
size() / threads + addon );
178 #pragma omp for schedule(dynamic,1) nowait 179 for(
int i=0UL; i<threads; ++i )
181 const size_t index( i*sizePerThread );
183 if( index >= (~lhs).
size() )
186 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
187 auto target( subvector<unaligned>( ~lhs, index,
size ) );
188 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
213 template<
typename VT1
217 inline EnableIf_< And< IsDenseVector<VT1>
218 , Or< Not< IsSMPAssignable<VT1> >
219 , Not< IsSMPAssignable<VT2> > > > >
220 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
226 assign( ~lhs, ~rhs );
250 template<
typename VT1
254 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
255 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
267 assign( ~lhs, ~rhs );
270 #pragma omp parallel shared( lhs, rhs ) 271 smpAssign_backend( ~lhs, ~rhs );
303 template<
typename VT1
307 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
313 using ET1 = ElementType_<VT1>;
314 using ET2 = ElementType_<VT2>;
316 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
317 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
319 const bool lhsAligned( (~lhs).isAligned() );
320 const bool rhsAligned( (~rhs).isAligned() );
322 const int threads ( omp_get_num_threads() );
323 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
324 const size_t equalShare ( (~lhs).
size() / threads + addon );
325 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
326 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
328 #pragma omp for schedule(dynamic,1) nowait 329 for(
int i=0UL; i<threads; ++i )
331 const size_t index( i*sizePerThread );
333 if( index >= (~lhs).
size() )
336 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
338 if( simdEnabled && lhsAligned && rhsAligned ) {
339 auto target( subvector<aligned>( ~lhs, index,
size ) );
340 addAssign( target, subvector<aligned>( ~rhs, index,
size ) );
342 else if( simdEnabled && lhsAligned ) {
343 auto target( subvector<aligned>( ~lhs, index,
size ) );
344 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
346 else if( simdEnabled && rhsAligned ) {
347 auto target( subvector<unaligned>( ~lhs, index,
size ) );
348 addAssign( target, subvector<aligned>( ~rhs, index,
size ) );
351 auto target( subvector<unaligned>( ~lhs, index,
size ) );
352 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
376 template<
typename VT1
380 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
386 const int threads ( omp_get_num_threads() );
387 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
388 const size_t sizePerThread( (~lhs).
size() / threads + addon );
390 #pragma omp for schedule(dynamic,1) nowait 391 for(
int i=0UL; i<threads; ++i )
393 const size_t index( i*sizePerThread );
395 if( index >= (~lhs).
size() )
398 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
399 auto target( subvector<unaligned>( ~lhs, index,
size ) );
400 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
425 template<
typename VT1
429 inline EnableIf_< And< IsDenseVector<VT1>
430 , Or< Not< IsSMPAssignable<VT1> >
431 , Not< IsSMPAssignable<VT2> > > > >
432 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
438 addAssign( ~lhs, ~rhs );
462 template<
typename VT1
466 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
467 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
479 addAssign( ~lhs, ~rhs );
482 #pragma omp parallel shared( lhs, rhs ) 483 smpAddAssign_backend( ~lhs, ~rhs );
515 template<
typename VT1
519 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
525 using ET1 = ElementType_<VT1>;
526 using ET2 = ElementType_<VT2>;
528 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
529 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
531 const bool lhsAligned( (~lhs).isAligned() );
532 const bool rhsAligned( (~rhs).isAligned() );
534 const int threads ( omp_get_num_threads() );
535 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
536 const size_t equalShare ( (~lhs).
size() / threads + addon );
537 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
538 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
540 #pragma omp for schedule(dynamic,1) nowait 541 for(
int i=0UL; i<threads; ++i )
543 const size_t index( i*sizePerThread );
545 if( index >= (~lhs).
size() )
548 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
550 if( simdEnabled && lhsAligned && rhsAligned ) {
551 auto target( subvector<aligned>( ~lhs, index,
size ) );
552 subAssign( target, subvector<aligned>( ~rhs, index,
size ) );
554 else if( simdEnabled && lhsAligned ) {
555 auto target( subvector<aligned>( ~lhs, index,
size ) );
556 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
558 else if( simdEnabled && rhsAligned ) {
559 auto target( subvector<unaligned>( ~lhs, index,
size ) );
560 subAssign( target, subvector<aligned>( ~rhs, index,
size ) );
563 auto target( subvector<unaligned>( ~lhs, index,
size ) );
564 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
588 template<
typename VT1
592 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
598 const int threads ( omp_get_num_threads() );
599 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
600 const size_t sizePerThread( (~lhs).
size() / threads + addon );
602 #pragma omp for schedule(dynamic,1) nowait 603 for(
int i=0UL; i<threads; ++i )
605 const size_t index( i*sizePerThread );
607 if( index >= (~lhs).
size() )
610 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
611 auto target( subvector<unaligned>( ~lhs, index,
size ) );
612 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
637 template<
typename VT1
641 inline EnableIf_< And< IsDenseVector<VT1>
642 , Or< Not< IsSMPAssignable<VT1> >
643 , Not< IsSMPAssignable<VT2> > > > >
644 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
650 subAssign( ~lhs, ~rhs );
674 template<
typename VT1
678 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
679 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
691 subAssign( ~lhs, ~rhs );
694 #pragma omp parallel shared( lhs, rhs ) 695 smpSubAssign_backend( ~lhs, ~rhs );
728 template<
typename VT1
732 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
738 using ET1 = ElementType_<VT1>;
739 using ET2 = ElementType_<VT2>;
741 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
742 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
744 const bool lhsAligned( (~lhs).isAligned() );
745 const bool rhsAligned( (~rhs).isAligned() );
747 const int threads ( omp_get_num_threads() );
748 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
749 const size_t equalShare ( (~lhs).
size() / threads + addon );
750 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
751 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
753 #pragma omp for schedule(dynamic,1) nowait 754 for(
int i=0UL; i<threads; ++i )
756 const size_t index( i*sizePerThread );
758 if( index >= (~lhs).
size() )
761 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
763 if( simdEnabled && lhsAligned && rhsAligned ) {
764 auto target( subvector<aligned>( ~lhs, index,
size ) );
765 multAssign( target, subvector<aligned>( ~rhs, index,
size ) );
767 else if( simdEnabled && lhsAligned ) {
768 auto target( subvector<aligned>( ~lhs, index,
size ) );
769 multAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
771 else if( simdEnabled && rhsAligned ) {
772 auto target( subvector<unaligned>( ~lhs, index,
size ) );
773 multAssign( target, subvector<aligned>( ~rhs, index,
size ) );
776 auto target( subvector<unaligned>( ~lhs, index,
size ) );
777 multAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
802 template<
typename VT1
806 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
812 const int threads ( omp_get_num_threads() );
813 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
814 const size_t sizePerThread( (~lhs).
size() / threads + addon );
816 #pragma omp for schedule(dynamic,1) nowait 817 for(
int i=0UL; i<threads; ++i )
819 const size_t index( i*sizePerThread );
821 if( index >= (~lhs).
size() )
824 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
825 auto target( subvector<unaligned>( ~lhs, index,
size ) );
826 multAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
851 template<
typename VT1
855 inline EnableIf_< And< IsDenseVector<VT1>
856 , Or< Not< IsSMPAssignable<VT1> >
857 , Not< IsSMPAssignable<VT2> > > > >
858 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
864 multAssign( ~lhs, ~rhs );
888 template<
typename VT1
892 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
893 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
905 multAssign( ~lhs, ~rhs );
908 #pragma omp parallel shared( lhs, rhs ) 909 smpMultAssign_backend( ~lhs, ~rhs );
941 template<
typename VT1
945 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
951 using ET1 = ElementType_<VT1>;
952 using ET2 = ElementType_<VT2>;
954 constexpr
bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable<ET1,ET2>::value );
955 constexpr
size_t SIMDSIZE( SIMDTrait< ElementType_<VT1> >::
size );
957 const bool lhsAligned( (~lhs).isAligned() );
958 const bool rhsAligned( (~rhs).isAligned() );
960 const int threads ( omp_get_num_threads() );
961 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
962 const size_t equalShare ( (~lhs).
size() / threads + addon );
963 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
964 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
966 #pragma omp for schedule(dynamic,1) nowait 967 for(
int i=0UL; i<threads; ++i )
969 const size_t index( i*sizePerThread );
971 if( index >= (~lhs).
size() )
974 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
976 if( simdEnabled && lhsAligned && rhsAligned ) {
977 auto target( subvector<aligned>( ~lhs, index,
size ) );
978 divAssign( target, subvector<aligned>( ~rhs, index,
size ) );
980 else if( simdEnabled && lhsAligned ) {
981 auto target( subvector<aligned>( ~lhs, index,
size ) );
982 divAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
984 else if( simdEnabled && rhsAligned ) {
985 auto target( subvector<unaligned>( ~lhs, index,
size ) );
986 divAssign( target, subvector<aligned>( ~rhs, index,
size ) );
989 auto target( subvector<unaligned>( ~lhs, index,
size ) );
990 divAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
1016 template<
typename VT1
1020 inline EnableIf_< And< IsDenseVector<VT1>
1021 , Or< Not< IsSMPAssignable<VT1> >
1022 , Not< IsSMPAssignable<VT2> > > > >
1023 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1029 divAssign( ~lhs, ~rhs );
1053 template<
typename VT1
1057 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1058 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1070 divAssign( ~lhs, ~rhs );
1073 #pragma omp parallel shared( lhs, rhs ) 1074 smpDivAssign_backend( ~lhs, ~rhs );
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the function trace functionality.