35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
92 template<
typename VT1
96 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
102 typedef ElementType_<VT1> ET1;
103 typedef ElementType_<VT2> ET2;
104 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
105 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
107 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
109 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
110 const bool lhsAligned ( (~lhs).isAligned() );
111 const bool rhsAligned ( (~rhs).isAligned() );
113 const int threads ( omp_get_num_threads() );
114 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
115 const size_t equalShare ( (~lhs).
size() / threads + addon );
116 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
117 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
119 #pragma omp for schedule(dynamic,1) nowait
120 for(
int i=0UL; i<threads; ++i )
122 const size_t index( i*sizePerThread );
124 if( index >= (~lhs).
size() )
127 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
129 if( simdEnabled && lhsAligned && rhsAligned ) {
130 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
131 assign( target, subvector<aligned>( ~rhs, index,
size ) );
133 else if( simdEnabled && lhsAligned ) {
134 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
135 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
137 else if( simdEnabled && rhsAligned ) {
138 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
139 assign( target, subvector<aligned>( ~rhs, index,
size ) );
142 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
143 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
167 template<
typename VT1
171 void smpAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
177 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
179 const int threads ( omp_get_num_threads() );
180 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
181 const size_t sizePerThread( (~lhs).
size() / threads + addon );
183 #pragma omp for schedule(dynamic,1) nowait
184 for(
int i=0UL; i<threads; ++i )
186 const size_t index( i*sizePerThread );
188 if( index >= (~lhs).
size() )
191 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
192 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
193 assign( target, subvector<unaligned>( ~rhs, index,
size ) );
218 template<
typename VT1
222 inline EnableIf_< And< IsDenseVector<VT1>
223 , Or< Not< IsSMPAssignable<VT1> >
224 , Not< IsSMPAssignable<VT2> > > > >
225 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
231 assign( ~lhs, ~rhs );
255 template<
typename VT1
259 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
260 smpAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
272 assign( ~lhs, ~rhs );
275 #pragma omp parallel shared( lhs, rhs )
276 smpAssign_backend( ~lhs, ~rhs );
308 template<
typename VT1
312 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
318 typedef ElementType_<VT1> ET1;
319 typedef ElementType_<VT2> ET2;
320 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
321 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
323 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
325 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
326 const bool lhsAligned ( (~lhs).isAligned() );
327 const bool rhsAligned ( (~rhs).isAligned() );
329 const int threads ( omp_get_num_threads() );
330 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
331 const size_t equalShare ( (~lhs).
size() / threads + addon );
332 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
333 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
335 #pragma omp for schedule(dynamic,1) nowait
336 for(
int i=0UL; i<threads; ++i )
338 const size_t index( i*sizePerThread );
340 if( index >= (~lhs).
size() )
343 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
345 if( simdEnabled && lhsAligned && rhsAligned ) {
346 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
347 addAssign( target, subvector<aligned>( ~rhs, index,
size ) );
349 else if( simdEnabled && lhsAligned ) {
350 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
351 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
353 else if( simdEnabled && rhsAligned ) {
354 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
355 addAssign( target, subvector<aligned>( ~rhs, index,
size ) );
358 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
359 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
383 template<
typename VT1
387 void smpAddAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
393 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
395 const int threads ( omp_get_num_threads() );
396 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
397 const size_t sizePerThread( (~lhs).
size() / threads + addon );
399 #pragma omp for schedule(dynamic,1) nowait
400 for(
int i=0UL; i<threads; ++i )
402 const size_t index( i*sizePerThread );
404 if( index >= (~lhs).
size() )
407 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
408 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
409 addAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
434 template<
typename VT1
438 inline EnableIf_< And< IsDenseVector<VT1>
439 , Or< Not< IsSMPAssignable<VT1> >
440 , Not< IsSMPAssignable<VT2> > > > >
441 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
447 addAssign( ~lhs, ~rhs );
471 template<
typename VT1
475 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
476 smpAddAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
488 addAssign( ~lhs, ~rhs );
491 #pragma omp parallel shared( lhs, rhs )
492 smpAddAssign_backend( ~lhs, ~rhs );
524 template<
typename VT1
528 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
534 typedef ElementType_<VT1> ET1;
535 typedef ElementType_<VT2> ET2;
536 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
537 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
539 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
541 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
542 const bool lhsAligned ( (~lhs).isAligned() );
543 const bool rhsAligned ( (~rhs).isAligned() );
545 const int threads ( omp_get_num_threads() );
546 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
547 const size_t equalShare ( (~lhs).
size() / threads + addon );
548 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
549 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
551 #pragma omp for schedule(dynamic,1) nowait
552 for(
int i=0UL; i<threads; ++i )
554 const size_t index( i*sizePerThread );
556 if( index >= (~lhs).
size() )
559 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
561 if( simdEnabled && lhsAligned && rhsAligned ) {
562 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
563 subAssign( target, subvector<aligned>( ~rhs, index,
size ) );
565 else if( simdEnabled && lhsAligned ) {
566 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
567 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
569 else if( simdEnabled && rhsAligned ) {
570 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
571 subAssign( target, subvector<aligned>( ~rhs, index,
size ) );
574 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
575 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
599 template<
typename VT1
603 void smpSubAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
609 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
611 const int threads ( omp_get_num_threads() );
612 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
613 const size_t sizePerThread( (~lhs).
size() / threads + addon );
615 #pragma omp for schedule(dynamic,1) nowait
616 for(
int i=0UL; i<threads; ++i )
618 const size_t index( i*sizePerThread );
620 if( index >= (~lhs).
size() )
623 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
624 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
625 subAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
650 template<
typename VT1
654 inline EnableIf_< And< IsDenseVector<VT1>
655 , Or< Not< IsSMPAssignable<VT1> >
656 , Not< IsSMPAssignable<VT2> > > > >
657 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
663 subAssign( ~lhs, ~rhs );
687 template<
typename VT1
691 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
692 smpSubAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
704 subAssign( ~lhs, ~rhs );
707 #pragma omp parallel shared( lhs, rhs )
708 smpSubAssign_backend( ~lhs, ~rhs );
741 template<
typename VT1
745 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
751 typedef ElementType_<VT1> ET1;
752 typedef ElementType_<VT2> ET2;
753 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
754 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
756 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
758 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
759 const bool lhsAligned ( (~lhs).isAligned() );
760 const bool rhsAligned ( (~rhs).isAligned() );
762 const int threads ( omp_get_num_threads() );
763 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
764 const size_t equalShare ( (~lhs).
size() / threads + addon );
765 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
766 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
768 #pragma omp for schedule(dynamic,1) nowait
769 for(
int i=0UL; i<threads; ++i )
771 const size_t index( i*sizePerThread );
773 if( index >= (~lhs).
size() )
776 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
778 if( simdEnabled && lhsAligned && rhsAligned ) {
779 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
780 multAssign( target, subvector<aligned>( ~rhs, index,
size ) );
782 else if( simdEnabled && lhsAligned ) {
783 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
784 multAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
786 else if( simdEnabled && rhsAligned ) {
787 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
788 multAssign( target, subvector<aligned>( ~rhs, index,
size ) );
791 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
792 multAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
817 template<
typename VT1
821 void smpMultAssign_backend( DenseVector<VT1,TF1>& lhs,
const SparseVector<VT2,TF2>& rhs )
827 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
829 const int threads ( omp_get_num_threads() );
830 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
831 const size_t sizePerThread( (~lhs).
size() / threads + addon );
833 #pragma omp for schedule(dynamic,1) nowait
834 for(
int i=0UL; i<threads; ++i )
836 const size_t index( i*sizePerThread );
838 if( index >= (~lhs).
size() )
841 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
842 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
843 multAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
868 template<
typename VT1
872 inline EnableIf_< And< IsDenseVector<VT1>
873 , Or< Not< IsSMPAssignable<VT1> >
874 , Not< IsSMPAssignable<VT2> > > > >
875 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
881 multAssign( ~lhs, ~rhs );
905 template<
typename VT1
909 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
910 smpMultAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
922 multAssign( ~lhs, ~rhs );
925 #pragma omp parallel shared( lhs, rhs )
926 smpMultAssign_backend( ~lhs, ~rhs );
958 template<
typename VT1
962 void smpDivAssign_backend( DenseVector<VT1,TF1>& lhs,
const DenseVector<VT2,TF2>& rhs )
968 typedef ElementType_<VT1> ET1;
969 typedef ElementType_<VT2> ET2;
970 typedef SubvectorExprTrait_<VT1,aligned> AlignedTarget;
971 typedef SubvectorExprTrait_<VT1,unaligned> UnalignedTarget;
973 enum :
size_t { SIMDSIZE = SIMDTrait< ElementType_<VT1> >
::size };
975 const bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSame<ET1,ET2>::value );
976 const bool lhsAligned ( (~lhs).isAligned() );
977 const bool rhsAligned ( (~rhs).isAligned() );
979 const int threads ( omp_get_num_threads() );
980 const size_t addon ( ( ( (~lhs).
size() % threads ) != 0UL )? 1UL : 0UL );
981 const size_t equalShare ( (~lhs).
size() / threads + addon );
982 const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
983 const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
985 #pragma omp for schedule(dynamic,1) nowait
986 for(
int i=0UL; i<threads; ++i )
988 const size_t index( i*sizePerThread );
990 if( index >= (~lhs).
size() )
993 const size_t size(
min( sizePerThread, (~lhs).
size() - index ) );
995 if( simdEnabled && lhsAligned && rhsAligned ) {
996 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
997 divAssign( target, subvector<aligned>( ~rhs, index,
size ) );
999 else if( simdEnabled && lhsAligned ) {
1000 AlignedTarget target( subvector<aligned>( ~lhs, index,
size ) );
1001 divAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
1003 else if( simdEnabled && rhsAligned ) {
1004 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
1005 divAssign( target, subvector<aligned>( ~rhs, index,
size ) );
1008 UnalignedTarget target( subvector<unaligned>( ~lhs, index,
size ) );
1009 divAssign( target, subvector<unaligned>( ~rhs, index,
size ) );
1035 template<
typename VT1
1039 inline EnableIf_< And< IsDenseVector<VT1>
1040 , Or< Not< IsSMPAssignable<VT1> >
1041 , Not< IsSMPAssignable<VT2> > > > >
1042 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1048 divAssign( ~lhs, ~rhs );
1072 template<
typename VT1
1076 inline EnableIf_< And< IsDenseVector<VT1>, IsSMPAssignable<VT1>, IsSMPAssignable<VT2> > >
1077 smpDivAssign( Vector<VT1,TF1>& lhs,
const Vector<VT2,TF2>& rhs )
1089 divAssign( ~lhs, ~rhs );
1092 #pragma omp parallel shared( lhs, rhs )
1093 smpDivAssign_backend( ~lhs, ~rhs );
Header file for auxiliary alias declarations.
Header file for mathematical functions.
Header file for basic type definitions.
Header file for the SparseVector base class.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:258
Header file for the IsSame and IsStrictlySame type traits.
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1669
Header file for the DenseVector base class.
Header file for the SIMD trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Header file for the Not class template.
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:246
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for run time assertion macros.
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:213
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:67
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the SubvectorExprTrait class template.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Constraint on the data type.
Header file for the FunctionTrace class.