DenseVector.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
36 #define _BLAZE_MATH_SMP_OPENMP_DENSEVECTOR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <omp.h>
44 #include <blaze/math/Aliases.h>
60 #include <blaze/system/SMP.h>
62 #include <blaze/util/Assert.h>
63 #include <blaze/util/EnableIf.h>
66 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // OPENMP-BASED ASSIGNMENT KERNELS
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
94 template< typename VT1 // Type of the left-hand side dense vector
95  , bool TF1 // Transpose flag of the left-hand side dense vector
96  , typename VT2 // Type of the right-hand side dense vector
97  , bool TF2 // Transpose flag of the right-hand side dense vector
98  , typename OP > // Type of the assignment operation
99 void openmpAssign( DenseVector<VT1,TF1>& lhs, const DenseVector<VT2,TF2>& rhs, OP op )
100 {
102 
103  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
104 
105  using ET1 = ElementType_t<VT1>;
106  using ET2 = ElementType_t<VT2>;
107 
108  constexpr bool simdEnabled( VT1::simdEnabled && VT2::simdEnabled && IsSIMDCombinable_v<ET1,ET2> );
109  constexpr size_t SIMDSIZE( SIMDTrait< ElementType_t<VT1> >::size );
110 
111  const bool lhsAligned( (~lhs).isAligned() );
112  const bool rhsAligned( (~rhs).isAligned() );
113 
114  const int threads ( omp_get_num_threads() );
115  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
116  const size_t equalShare ( (~lhs).size() / threads + addon );
117  const size_t rest ( equalShare & ( SIMDSIZE - 1UL ) );
118  const size_t sizePerThread( ( simdEnabled && rest )?( equalShare - rest + SIMDSIZE ):( equalShare ) );
119 
120 #pragma omp for schedule(dynamic,1) nowait
121  for( int i=0UL; i<threads; ++i )
122  {
123  const size_t index( i*sizePerThread );
124 
125  if( index >= (~lhs).size() )
126  continue;
127 
128  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
129 
130  if( simdEnabled && lhsAligned && rhsAligned ) {
131  auto target( subvector<aligned>( ~lhs, index, size, unchecked ) );
132  const auto source( subvector<aligned>( ~rhs, index, size, unchecked ) );
133  op( target, source );
134  }
135  else if( simdEnabled && lhsAligned ) {
136  auto target( subvector<aligned>( ~lhs, index, size, unchecked ) );
137  const auto source( subvector<unaligned>( ~rhs, index, size, unchecked ) );
138  op( target, source );
139  }
140  else if( simdEnabled && rhsAligned ) {
141  auto target( subvector<unaligned>( ~lhs, index, size, unchecked ) );
142  const auto source( subvector<aligned>( ~rhs, index, size, unchecked ) );
143  op( target, source );
144  }
145  else {
146  auto target( subvector<unaligned>( ~lhs, index, size, unchecked ) );
147  const auto source( subvector<unaligned>( ~rhs, index, size, unchecked ) );
148  op( target, source );
149  }
150  }
151 }
153 //*************************************************************************************************
154 
155 
156 //*************************************************************************************************
173 template< typename VT1 // Type of the left-hand side dense vector
174  , bool TF1 // Transpose flag of the left-hand side dense vector
175  , typename VT2 // Type of the right-hand side sparse vector
176  , bool TF2 // Transpose flag of the right-hand side sparse vector
177  , typename OP > // Type of the assignment operation
178 void openmpAssign( DenseVector<VT1,TF1>& lhs, const SparseVector<VT2,TF2>& rhs, OP op )
179 {
181 
182  BLAZE_INTERNAL_ASSERT( isParallelSectionActive(), "Invalid call outside a parallel section" );
183 
184  const int threads ( omp_get_num_threads() );
185  const size_t addon ( ( ( (~lhs).size() % threads ) != 0UL )? 1UL : 0UL );
186  const size_t sizePerThread( (~lhs).size() / threads + addon );
187 
188 #pragma omp for schedule(dynamic,1) nowait
189  for( int i=0UL; i<threads; ++i )
190  {
191  const size_t index( i*sizePerThread );
192 
193  if( index >= (~lhs).size() )
194  continue;
195 
196  const size_t size( min( sizePerThread, (~lhs).size() - index ) );
197  auto target( subvector<unaligned>( ~lhs, index, size, unchecked ) );
198  const auto source( subvector<unaligned>( ~rhs, index, size, unchecked ) );
199  op( target, source );
200  }
201 }
203 //*************************************************************************************************
204 
205 
206 
207 
208 //=================================================================================================
209 //
210 // PLAIN ASSIGNMENT
211 //
212 //=================================================================================================
213 
214 //*************************************************************************************************
232 template< typename VT1 // Type of the left-hand side dense vector
233  , bool TF1 // Transpose flag of the left-hand side dense vector
234  , typename VT2 // Type of the right-hand side vector
235  , bool TF2 > // Transpose flag of the right-hand side vector
236 inline auto smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
237  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
238 {
240 
241  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
242 
243  assign( ~lhs, ~rhs );
244 }
246 //*************************************************************************************************
247 
248 
249 //*************************************************************************************************
267 template< typename VT1 // Type of the left-hand side dense vector
268  , bool TF1 // Transpose flag of the left-hand side dense vector
269  , typename VT2 // Type of the right-hand side vector
270  , bool TF2 > // Transpose flag of the right-hand side vector
271 inline auto smpAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
272  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
273 {
275 
276  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
277  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
278 
279  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
280 
282  {
283  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
284  assign( ~lhs, ~rhs );
285  }
286  else {
287 #pragma omp parallel shared( lhs, rhs )
288  openmpAssign( ~lhs, ~rhs, Assign() );
289  }
290  }
291 }
293 //*************************************************************************************************
294 
295 
296 
297 
298 //=================================================================================================
299 //
300 // ADDITION ASSIGNMENT
301 //
302 //=================================================================================================
303 
304 //*************************************************************************************************
322 template< typename VT1 // Type of the left-hand side dense vector
323  , bool TF1 // Transpose flag of the left-hand side dense vector
324  , typename VT2 // Type of the right-hand side vector
325  , bool TF2 > // Transpose flag of the right-hand side vector
326 inline auto smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
327  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
328 {
330 
331  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
332 
333  addAssign( ~lhs, ~rhs );
334 }
336 //*************************************************************************************************
337 
338 
339 //*************************************************************************************************
357 template< typename VT1 // Type of the left-hand side dense vector
358  , bool TF1 // Transpose flag of the left-hand side dense vector
359  , typename VT2 // Type of the right-hand side vector
360  , bool TF2 > // Transpose flag of the right-hand side vector
361 inline auto smpAddAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
362  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
363 {
365 
366  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
367  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
368 
369  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
370 
372  {
373  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
374  addAssign( ~lhs, ~rhs );
375  }
376  else {
377 #pragma omp parallel shared( lhs, rhs )
378  openmpAssign( ~lhs, ~rhs, AddAssign() );
379  }
380  }
381 }
383 //*************************************************************************************************
384 
385 
386 
387 
388 //=================================================================================================
389 //
390 // SUBTRACTION ASSIGNMENT
391 //
392 //=================================================================================================
393 
394 //*************************************************************************************************
412 template< typename VT1 // Type of the left-hand side dense vector
413  , bool TF1 // Transpose flag of the left-hand side dense vector
414  , typename VT2 // Type of the right-hand side vector
415  , bool TF2 > // Transpose flag of the right-hand side vector
416 inline auto smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
417  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
418 {
420 
421  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
422 
423  subAssign( ~lhs, ~rhs );
424 }
426 //*************************************************************************************************
427 
428 
429 //*************************************************************************************************
447 template< typename VT1 // Type of the left-hand side dense vector
448  , bool TF1 // Transpose flag of the left-hand side dense vector
449  , typename VT2 // Type of the right-hand side vector
450  , bool TF2 > // Transpose flag of the right-hand side vector
451 inline auto smpSubAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
452  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
453 {
455 
456  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
457  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
458 
459  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
460 
462  {
463  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
464  subAssign( ~lhs, ~rhs );
465  }
466  else {
467 #pragma omp parallel shared( lhs, rhs )
468  openmpAssign( ~lhs, ~rhs, SubAssign() );
469  }
470  }
471 }
473 //*************************************************************************************************
474 
475 
476 
477 
478 //=================================================================================================
479 //
480 // MULTIPLICATION ASSIGNMENT
481 //
482 //=================================================================================================
483 
484 //*************************************************************************************************
502 template< typename VT1 // Type of the left-hand side dense vector
503  , bool TF1 // Transpose flag of the left-hand side dense vector
504  , typename VT2 // Type of the right-hand side vector
505  , bool TF2 > // Transpose flag of the right-hand side vector
506 inline auto smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
507  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
508 {
510 
511  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
512 
513  multAssign( ~lhs, ~rhs );
514 }
516 //*************************************************************************************************
517 
518 
519 //*************************************************************************************************
537 template< typename VT1 // Type of the left-hand side dense vector
538  , bool TF1 // Transpose flag of the left-hand side dense vector
539  , typename VT2 // Type of the right-hand side vector
540  , bool TF2 > // Transpose flag of the right-hand side vector
541 inline auto smpMultAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
542  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
543 {
545 
546  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
547  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
548 
549  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
550 
552  {
553  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
554  multAssign( ~lhs, ~rhs );
555  }
556  else {
557 #pragma omp parallel shared( lhs, rhs )
558  openmpAssign( ~lhs, ~rhs, MultAssign() );
559  }
560  }
561 }
563 //*************************************************************************************************
564 
565 
566 
567 
568 //=================================================================================================
569 //
570 // DIVISION ASSIGNMENT
571 //
572 //=================================================================================================
573 
574 //*************************************************************************************************
592 template< typename VT1 // Type of the left-hand side dense vector
593  , bool TF1 // Transpose flag of the left-hand side dense vector
594  , typename VT2 // Type of the right-hand side vector
595  , bool TF2 > // Transpose flag of the right-hand side vector
596 inline auto smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
597  -> EnableIf_t< IsDenseVector_v<VT1> && ( !IsSMPAssignable_v<VT1> || !IsSMPAssignable_v<VT2> ) >
598 {
600 
601  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
602 
603  divAssign( ~lhs, ~rhs );
604 }
606 //*************************************************************************************************
607 
608 
609 //*************************************************************************************************
627 template< typename VT1 // Type of the left-hand side dense vector
628  , bool TF1 // Transpose flag of the left-hand side dense vector
629  , typename VT2 // Type of the right-hand side vector
630  , bool TF2 > // Transpose flag of the right-hand side vector
631 inline auto smpDivAssign( Vector<VT1,TF1>& lhs, const Vector<VT2,TF2>& rhs )
632  -> EnableIf_t< IsDenseVector_v<VT1> && IsSMPAssignable_v<VT1> && IsSMPAssignable_v<VT2> >
633 {
635 
636  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT1> );
637  BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE( ElementType_t<VT2> );
638 
639  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
640 
642  {
643  if( isSerialSectionActive() || !(~rhs).canSMPAssign() ) {
644  divAssign( ~lhs, ~rhs );
645  }
646  else {
647 #pragma omp parallel shared( lhs, rhs )
648  openmpAssign( ~lhs, ~rhs, DivAssign() );
649  }
650  }
651 }
653 //*************************************************************************************************
654 
655 
656 
657 
658 //=================================================================================================
659 //
660 // COMPILE TIME CONSTRAINTS
661 //
662 //=================================================================================================
663 
664 //*************************************************************************************************
666 namespace {
667 
669 
670 }
672 //*************************************************************************************************
673 
674 } // namespace blaze
675 
676 #endif
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Header file for basic type definitions.
Header file for the SparseVector base class.
Header file for the Assign functor.
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the AddAssign functor.
Header file for the DenseVector base class.
Header file for the SIMD trait.
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:220
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SMP_ASSIGNABLE(T)
Constraint on the data type.In case the given data type T is SMP-assignable (can be assigned by multi...
Definition: SMPAssignable.h:81
Header file for the implementation of the Subvector view.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the SubAssign functor.
Compile time assertion.
System settings for the shared-memory parallelization.
Header file for the IsSMPAssignable type trait.
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
Header file for the serial section implementation.
Header file for the parallel section implementation.
Header file for the EnableIf class template.
#define BLAZE_PARALLEL_SECTION
Section for the debugging of the shared-memory parallelization.During the shared-memory parallel (SMP...
Definition: ParallelSection.h:254
bool isSerialSectionActive()
Returns whether a serial section is active or not.
Definition: SerialSection.h:213
Header file for the IsSIMDCombinable type trait.
Header file for run time assertion macros.
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Header file for the IsDenseVector type trait.
bool isParallelSectionActive()
Returns whether a parallel section is active or not.
Definition: ParallelSection.h:221
Header file for the MultAssign functor.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
#define BLAZE_OPENMP_PARALLEL_MODE
Compilation switch for the OpenMP parallelization.This compilation switch enables/disables the OpenMP...
Definition: SMP.h:68
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:191
Header file for the DivAssign functor.
Constraint on the data type.
Header file for the function trace functionality.