Blaze  3.6
DenseVector.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
44 #include <blaze/math/Aliases.h>
55 #include <blaze/system/SMP.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/EnableIf.h>
61 #include <blaze/util/Types.h>
62 
63 
64 namespace blaze {
65 
66 //=================================================================================================
67 //
68 // OPENMP-BASED ASSIGNMENT KERNELS
69 //
70 //=================================================================================================
71 
72 //*************************************************************************************************
89 template< typename VT1 // Type of the left-hand side dense vector
90  , bool TF1 // Transpose flag of the left-hand side dense vector
91  , typename VT2 // Type of the right-hand side dense vector
92  , bool TF2 // Transpose flag of the right-hand side dense vector
93  , typename OP > // Type of the assignment operation
94 void openmpAssign( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs, OP op )
95 {
97 
98  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
99 
100  using ET1 = ElementType_t<VT1>;
101  using ET2 = ElementType_t<VT2>;
102 
103  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
104  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >::size );
105 
106  const bool lhsAligned( (~lhs).isAligned() );
107  const bool rhsAligned( (~rhs).isAligned() );
108 
109  const int threads ( omp_get_num_threads() );
110  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
111  const size_t equalShare ( (~lhs).size() / threads + addon );
112  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
113  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
114 
115 #pragma omp for schedule(dynamic,1) nowait
116  for( int i=0UL; i<threads; ++i )
117  {
118  const size_t index( i*sizePerThread );
119 
120  if( index >= (~lhs).size() )
121  continue;
122 
123  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
124 
125  if( simdEnabled && lhsAligned && rhsAligned ) {
126  auto target( subvector<aligned>( ~lhs, index, size, unchecked ) );
127  const auto source( subvector<aligned>( ~rhs, index, size, unchecked ) );
128  op( target, source );
129  }
130  else if( simdEnabled && lhsAligned ) {
131  auto target( subvector<aligned>( ~lhs, index, size, unchecked ) );
132  const auto source( subvector<unaligned>( ~rhs, index, size, unchecked ) );
133  op( target, source );
134  }
135  else if( simdEnabled && rhsAligned ) {
136  auto target( subvector<unaligned>( ~lhs, index, size, unchecked ) );
137  const auto source( subvector<aligned>( ~rhs, index, size, unchecked ) );
138  op( target, source );
139  }
140  else {
141  auto target( subvector<unaligned>( ~lhs, index, size, unchecked ) );
142  const auto source( subvector<unaligned>( ~rhs, index, size, unchecked ) );
143  op( target, source );
144  }
145  }
146 }
148 //*************************************************************************************************
149 
150 
151 //*************************************************************************************************
168 template< typename VT1 // Type of the left-hand side dense vector
169  , bool TF1 // Transpose flag of the left-hand side dense vector
170  , typename VT2 // Type of the right-hand side sparse vector
171  , bool TF2 // Transpose flag of the right-hand side sparse vector
172  , typename OP > // Type of the assignment operation
173 void openmpAssign( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs, OP op )
174 {
176 
177  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
178 
179  const int threads ( omp_get_num_threads() );
180  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
181  const size_t sizePerThread( (~lhs).size() / threads + addon );
182 
183 #pragma omp for schedule(dynamic,1) nowait
184  for( int i=0UL; i<threads; ++i )
185  {
186  const size_t index( i*sizePerThread );
187 
188  if( index >= (~lhs).size() )
189  continue;
190 
191  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
192  auto target( subvector<unaligned>( ~lhs, index, size, unchecked ) );
193  const auto source( subvector<unaligned>( ~rhs, index, size, unchecked ) );
194  op( target, source );
195  }
196 }
198 //*************************************************************************************************
199 
200 
201 
202 
203 //=================================================================================================
204 //
205 // PLAIN ASSIGNMENT
206 //
207 //=================================================================================================
208 
209 //*************************************************************************************************
227 template< typename VT1 // Type of the left-hand side dense vector
228  , bool TF1 // Transpose flag of the left-hand side dense vector
229  , typename VT2 // Type of the right-hand side vector
230  , bool TF2 > // Transpose flag of the right-hand side vector
231 inline auto smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
232  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
233 {
235 
236  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
237 
238  assign( ~lhs, ~rhs );
239 }
241 //*************************************************************************************************
242 
243 
244 //*************************************************************************************************
262 template< typename VT1 // Type of the left-hand side dense vector
263  , bool TF1 // Transpose flag of the left-hand side dense vector
264  , typename VT2 // Type of the right-hand side vector
265  , bool TF2 > // Transpose flag of the right-hand side vector
266 inline auto smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
267  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
268 {
270 
271  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
272  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
273 
274  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
275 
277  {
278  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
279  assign( ~lhs, ~rhs );
280  }
281  else {
282 #pragma omp parallel shared( lhs, rhs )
283  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ assign( a, b ); } );
284  }
285  }
286 }
288 //*************************************************************************************************
289 
290 
291 
292 
293 //=================================================================================================
294 //
295 // ADDITION ASSIGNMENT
296 //
297 //=================================================================================================
298 
299 //*************************************************************************************************
317 template< typename VT1 // Type of the left-hand side dense vector
318  , bool TF1 // Transpose flag of the left-hand side dense vector
319  , typename VT2 // Type of the right-hand side vector
320  , bool TF2 > // Transpose flag of the right-hand side vector
321 inline auto smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
322  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
323 {
325 
326  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
327 
328  addAssign( ~lhs, ~rhs );
329 }
331 //*************************************************************************************************
332 
333 
334 //*************************************************************************************************
352 template< typename VT1 // Type of the left-hand side dense vector
353  , bool TF1 // Transpose flag of the left-hand side dense vector
354  , typename VT2 // Type of the right-hand side vector
355  , bool TF2 > // Transpose flag of the right-hand side vector
356 inline auto smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
357  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
358 {
360 
361  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
362  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
363 
364  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
365 
367  {
368  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
369  addAssign( ~lhs, ~rhs );
370  }
371  else {
372 #pragma omp parallel shared( lhs, rhs )
373  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ addAssign( a, b ); } );
374  }
375  }
376 }
378 //*************************************************************************************************
379 
380 
381 
382 
383 //=================================================================================================
384 //
385 // SUBTRACTION ASSIGNMENT
386 //
387 //=================================================================================================
388 
389 //*************************************************************************************************
407 template< typename VT1 // Type of the left-hand side dense vector
408  , bool TF1 // Transpose flag of the left-hand side dense vector
409  , typename VT2 // Type of the right-hand side vector
410  , bool TF2 > // Transpose flag of the right-hand side vector
411 inline auto smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
412  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
413 {
415 
416  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
417 
418  subAssign( ~lhs, ~rhs );
419 }
421 //*************************************************************************************************
422 
423 
424 //*************************************************************************************************
442 template< typename VT1 // Type of the left-hand side dense vector
443  , bool TF1 // Transpose flag of the left-hand side dense vector
444  , typename VT2 // Type of the right-hand side vector
445  , bool TF2 > // Transpose flag of the right-hand side vector
446 inline auto smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
447  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
448 {
450 
451  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
452  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
453 
454  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
455 
457  {
458  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
459  subAssign( ~lhs, ~rhs );
460  }
461  else {
462 #pragma omp parallel shared( lhs, rhs )
463  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ subAssign( a, b ); } );
464  }
465  }
466 }
468 //*************************************************************************************************
469 
470 
471 
472 
473 //=================================================================================================
474 //
475 // MULTIPLICATION ASSIGNMENT
476 //
477 //=================================================================================================
478 
479 //*************************************************************************************************
497 template< typename VT1 // Type of the left-hand side dense vector
498  , bool TF1 // Transpose flag of the left-hand side dense vector
499  , typename VT2 // Type of the right-hand side vector
500  , bool TF2 > // Transpose flag of the right-hand side vector
501 inline auto smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
502  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
503 {
505 
506  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
507 
508  multAssign( ~lhs, ~rhs );
509 }
511 //*************************************************************************************************
512 
513 
514 //*************************************************************************************************
532 template< typename VT1 // Type of the left-hand side dense vector
533  , bool TF1 // Transpose flag of the left-hand side dense vector
534  , typename VT2 // Type of the right-hand side vector
535  , bool TF2 > // Transpose flag of the right-hand side vector
536 inline auto smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
537  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
538 {
540 
541  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
542  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
543 
544  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
545 
547  {
548  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
549  multAssign( ~lhs, ~rhs );
550  }
551  else {
552 #pragma omp parallel shared( lhs, rhs )
553  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ multAssign( a, b ); } );
554  }
555  }
556 }
558 //*************************************************************************************************
559 
560 
561 
562 
563 //=================================================================================================
564 //
565 // DIVISION ASSIGNMENT
566 //
567 //=================================================================================================
568 
569 //*************************************************************************************************
587 template< typename VT1 // Type of the left-hand side dense vector
588  , bool TF1 // Transpose flag of the left-hand side dense vector
589  , typename VT2 // Type of the right-hand side vector
590  , bool TF2 > // Transpose flag of the right-hand side vector
591 inline auto smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
592  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
593 {
595 
596  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
597 
598  divAssign( ~lhs, ~rhs );
599 }
601 //*************************************************************************************************
602 
603 
604 //*************************************************************************************************
622 template< typename VT1 // Type of the left-hand side dense vector
623  , bool TF1 // Transpose flag of the left-hand side dense vector
624  , typename VT2 // Type of the right-hand side vector
625  , bool TF2 > // Transpose flag of the right-hand side vector
626 inline auto smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
627  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
628 {
630 
631  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
632  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
633 
634  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
635 
637  {
638  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
639  divAssign( ~lhs, ~rhs );
640  }
641  else {
642 #pragma omp parallel shared( lhs, rhs )
643  openmpAssign( ~lhs, ~rhs, []( auto& a, const auto& b ){ divAssign( a, b ); } );
644  }
645  }
646 }
648 //*************************************************************************************************
649 
650 
651 
652 
653 //=================================================================================================
654 //
655 // COMPILE TIME CONSTRAINTS
656 //
657 //=================================================================================================
658 
659 //*************************************************************************************************
661 namespace {
662 
664 
665 }
667 //*************************************************************************************************
668 
669 } // namespace blaze
670 
671 #endif
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the DenseVector base class.
Header file for the SIMD trait.
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:220
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:68
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:191
Constraint on the data type.
Header file for the function trace functionality.