TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemv.h>
44 #include <blaze/math/blas/trmv.h>
45 #include <blaze/math/Aliases.h>
53 #include <blaze/math/Exception.h>
59 #include <blaze/math/shims/Reset.h>
61 #include <blaze/math/SIMD.h>
82 #include <blaze/math/views/Check.h>
83 #include <blaze/system/BLAS.h>
88 #include <blaze/util/Assert.h>
89 #include <blaze/util/Complex.h>
91 #include <blaze/util/DisableIf.h>
92 #include <blaze/util/EnableIf.h>
94 #include <blaze/util/mpl/And.h>
95 #include <blaze/util/mpl/If.h>
96 #include <blaze/util/Types.h>
104 
105 
106 namespace blaze {
107 
108 //=================================================================================================
109 //
110 // CLASS TDMATDVECMULTEXPR
111 //
112 //=================================================================================================
113 
114 //*************************************************************************************************
121 template< typename MT // Type of the left-hand side dense matrix
122  , typename VT > // Type of the right-hand side dense vector
123 class TDMatDVecMultExpr
124  : public MatVecMultExpr< DenseVector< TDMatDVecMultExpr<MT,VT>, false > >
125  , private Computation
126 {
127  private:
128  //**Type definitions****************************************************************************
135  //**********************************************************************************************
136 
137  //**********************************************************************************************
139  enum : bool { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
141  //**********************************************************************************************
142 
143  //**********************************************************************************************
145  enum : bool { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
146  //**********************************************************************************************
147 
148  //**********************************************************************************************
150 
154  template< typename T1 >
155  struct UseSMPAssign {
156  enum : bool { value = ( evaluateMatrix || evaluateVector ) };
157  };
159  //**********************************************************************************************
160 
161  //**********************************************************************************************
163 
166  template< typename T1, typename T2, typename T3 >
167  struct UseBlasKernel {
173  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
178  IsSame< ElementType_<T1>, ElementType_<T3> >::value };
179  };
181  //**********************************************************************************************
182 
183  //**********************************************************************************************
185 
189  template< typename T1, typename T2, typename T3 >
190  struct UseVectorizedDefaultKernel {
191  enum : bool { value = useOptimizedKernels &&
193  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
196  , ElementType_<T3> >::value &&
199  };
201  //**********************************************************************************************
202 
203  public:
204  //**Type definitions****************************************************************************
210  using ReturnType = const ElementType;
211  using CompositeType = const ResultType;
212 
214  using LeftOperand = If_< IsExpression<MT>, const MT, const MT& >;
215 
217  using RightOperand = If_< IsExpression<VT>, const VT, const VT& >;
218 
221 
224  //**********************************************************************************************
225 
226  //**Compilation flags***************************************************************************
228  enum : bool { simdEnabled = !IsDiagonal<MT>::value &&
229  MT::simdEnabled && VT::simdEnabled &&
232 
234  enum : bool { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
235  !evaluateVector && VT::smpAssignable };
236  //**********************************************************************************************
237 
238  //**SIMD properties*****************************************************************************
240  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
241  //**********************************************************************************************
242 
243  //**Constructor*********************************************************************************
249  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec ) noexcept
250  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
251  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
252  {
253  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
254  }
255  //**********************************************************************************************
256 
257  //**Subscript operator**************************************************************************
263  inline ReturnType operator[]( size_t index ) const {
264  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
265 
267  {
268  return mat_(index,index) * vec_[index];
269  }
270  else if( IsLower<MT>::value && ( index + 8UL < mat_.rows() ) )
271  {
272  const size_t n( IsStrictlyLower<MT>::value ? index : index+1UL );
273  return subvector( row( mat_, index, unchecked ), 0UL, n, unchecked ) *
274  subvector( vec_, 0UL, n, unchecked );
275  }
276  else if( IsUpper<MT>::value && ( index > 8UL ) )
277  {
278  const size_t begin( IsStrictlyUpper<MT>::value ? index+1UL : index );
279  const size_t n ( mat_.columns() - begin );
280  return subvector( row( mat_, index, unchecked ), begin, n, unchecked ) *
281  subvector( vec_, begin, n, unchecked );
282  }
283  else
284  {
285  return row( mat_, index, unchecked ) * vec_;
286  }
287  }
288  //**********************************************************************************************
289 
290  //**At function*********************************************************************************
297  inline ReturnType at( size_t index ) const {
298  if( index >= mat_.rows() ) {
299  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
300  }
301  return (*this)[index];
302  }
303  //**********************************************************************************************
304 
305  //**Size function*******************************************************************************
310  inline size_t size() const noexcept {
311  return mat_.rows();
312  }
313  //**********************************************************************************************
314 
315  //**Left operand access*************************************************************************
320  inline LeftOperand leftOperand() const noexcept {
321  return mat_;
322  }
323  //**********************************************************************************************
324 
325  //**Right operand access************************************************************************
330  inline RightOperand rightOperand() const noexcept {
331  return vec_;
332  }
333  //**********************************************************************************************
334 
335  //**********************************************************************************************
341  template< typename T >
342  inline bool canAlias( const T* alias ) const noexcept {
343  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
344  }
345  //**********************************************************************************************
346 
347  //**********************************************************************************************
353  template< typename T >
354  inline bool isAliased( const T* alias ) const noexcept {
355  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
356  }
357  //**********************************************************************************************
358 
359  //**********************************************************************************************
364  inline bool isAligned() const noexcept {
365  return mat_.isAligned() && vec_.isAligned();
366  }
367  //**********************************************************************************************
368 
369  //**********************************************************************************************
374  inline bool canSMPAssign() const noexcept {
375  return ( !BLAZE_BLAS_MODE ||
378  ( IsComputation<MT>::value && !evaluateMatrix ) ||
379  ( mat_.rows() * mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
380  ( size() > SMP_TDMATDVECMULT_THRESHOLD );
381  }
382  //**********************************************************************************************
383 
384  private:
385  //**Member variables****************************************************************************
388  //**********************************************************************************************
389 
390  //**Assignment to dense vectors*****************************************************************
403  template< typename VT1 > // Type of the target dense vector
404  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
405  {
407 
408  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
409 
410  if( rhs.mat_.rows() == 0UL ) {
411  return;
412  }
413  else if( rhs.mat_.columns() == 0UL ) {
414  reset( ~lhs );
415  return;
416  }
417 
418  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
419  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
420 
421  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
422  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
423  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
424  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
425 
426  TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
427  }
429  //**********************************************************************************************
430 
431  //**Assignment to dense vectors (kernel selection)**********************************************
442  template< typename VT1 // Type of the left-hand side target vector
443  , typename MT1 // Type of the left-hand side matrix operand
444  , typename VT2 > // Type of the right-hand side vector operand
445  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
446  {
447  if( ( IsDiagonal<MT1>::value ) ||
448  ( IsComputation<MT>::value && !evaluateMatrix ) ||
449  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
450  selectSmallAssignKernel( y, A, x );
451  else
452  selectBlasAssignKernel( y, A, x );
453  }
455  //**********************************************************************************************
456 
457  //**Default assignment to dense vectors*********************************************************
471  template< typename VT1 // Type of the left-hand side target vector
472  , typename MT1 // Type of the left-hand side matrix operand
473  , typename VT2 > // Type of the right-hand side vector operand
474  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
475  {
476  const size_t M( A.rows() );
477  const size_t N( A.columns() );
478 
480  reset( y[0] );
481  }
482 
483  if( !IsUpper<MT1>::value )
484  {
485  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
486  y[i] = A(i,0UL) * x[0UL];
487  }
488  }
489 
490  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
491  {
493  {
494  y[j] = A(j,j) * x[j];
495  }
496  else
497  {
498  const size_t ibegin( ( IsLower<MT1>::value )
499  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
500  :( 0UL ) );
501  const size_t iend( ( IsUpper<MT1>::value )
502  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
503  :( M ) );
504  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
505 
506  const size_t inum( iend - ibegin );
507  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
508 
509  for( size_t i=ibegin; i<ipos; i+=2UL ) {
510  y[i ] += A(i ,j) * x[j];
511  y[i+1UL] += A(i+1UL,j) * x[j];
512  }
513  if( ipos < iend ) {
514  y[ipos] += A(ipos,j) * x[j];
515  }
516  if( IsUpper<MT1>::value ) {
517  y[iend] = A(iend,j) * x[j];
518  }
519  }
520  }
521 
523  reset( y[M-1UL] );
524  }
525  }
527  //**********************************************************************************************
528 
529  //**Default assignment to dense vectors (small matrices)****************************************
543  template< typename VT1 // Type of the left-hand side target vector
544  , typename MT1 // Type of the left-hand side matrix operand
545  , typename VT2 > // Type of the right-hand side vector operand
547  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
548  {
549  selectDefaultAssignKernel( y, A, x );
550  }
552  //**********************************************************************************************
553 
554  //**Vectorized default assignment to dense vectors (small matrices)*****************************
568  template< typename VT1 // Type of the left-hand side target vector
569  , typename MT1 // Type of the left-hand side matrix operand
570  , typename VT2 > // Type of the right-hand side vector operand
572  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
573  {
574  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
575 
576  const size_t M( A.rows() );
577  const size_t N( A.columns() );
578 
579  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
580  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
581 
582  size_t i( 0UL );
583 
584  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
585  {
586  const size_t jbegin( ( IsUpper<MT1>::value )
587  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
588  :( 0UL ) );
589  const size_t jend( ( IsLower<MT1>::value )
590  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
591  :( N ) );
592  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
593 
594  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
595 
596  for( size_t j=jbegin; j<jend; ++j ) {
597  const SIMDType x1( set( x[j] ) );
598  xmm1 += A.load(i ,j) * x1;
599  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
600  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
601  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
602  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
603  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
604  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
605  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
606  }
607 
608  y.store( i , xmm1 );
609  y.store( i+SIMDSIZE , xmm2 );
610  y.store( i+SIMDSIZE*2UL, xmm3 );
611  y.store( i+SIMDSIZE*3UL, xmm4 );
612  y.store( i+SIMDSIZE*4UL, xmm5 );
613  y.store( i+SIMDSIZE*5UL, xmm6 );
614  y.store( i+SIMDSIZE*6UL, xmm7 );
615  y.store( i+SIMDSIZE*7UL, xmm8 );
616  }
617 
618  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
619  {
620  const size_t jbegin( ( IsUpper<MT1>::value )
621  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
622  :( 0UL ) );
623  const size_t jend( ( IsLower<MT1>::value )
624  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
625  :( N ) );
626  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
627 
628  SIMDType xmm1, xmm2, xmm3, xmm4;
629 
630  for( size_t j=jbegin; j<jend; ++j ) {
631  const SIMDType x1( set( x[j] ) );
632  xmm1 += A.load(i ,j) * x1;
633  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
634  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
635  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
636  }
637 
638  y.store( i , xmm1 );
639  y.store( i+SIMDSIZE , xmm2 );
640  y.store( i+SIMDSIZE*2UL, xmm3 );
641  y.store( i+SIMDSIZE*3UL, xmm4 );
642  }
643 
644  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
645  {
646  const size_t jbegin( ( IsUpper<MT1>::value )
647  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
648  :( 0UL ) );
649  const size_t jend( ( IsLower<MT1>::value )
650  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
651  :( N ) );
652  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
653 
654  SIMDType xmm1, xmm2, xmm3;
655 
656  for( size_t j=jbegin; j<jend; ++j ) {
657  const SIMDType x1( set( x[j] ) );
658  xmm1 += A.load(i ,j) * x1;
659  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
660  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
661  }
662 
663  y.store( i , xmm1 );
664  y.store( i+SIMDSIZE , xmm2 );
665  y.store( i+SIMDSIZE*2UL, xmm3 );
666  }
667 
668  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
669  {
670  const size_t jbegin( ( IsUpper<MT1>::value )
671  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
672  :( 0UL ) );
673  const size_t jend( ( IsLower<MT1>::value )
674  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
675  :( N ) );
676  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
677 
678  SIMDType xmm1, xmm2;
679 
680  for( size_t j=jbegin; j<jend; ++j ) {
681  const SIMDType x1( set( x[j] ) );
682  xmm1 += A.load(i ,j) * x1;
683  xmm2 += A.load(i+SIMDSIZE,j) * x1;
684  }
685 
686  y.store( i , xmm1 );
687  y.store( i+SIMDSIZE, xmm2 );
688  }
689 
690  for( ; i<ipos; i+=SIMDSIZE )
691  {
692  const size_t jbegin( ( IsUpper<MT1>::value )
693  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
694  :( 0UL ) );
695  const size_t jend( ( IsLower<MT1>::value )
696  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
697  :( N ) );
698  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
699 
700  SIMDType xmm1;
701 
702  for( size_t j=jbegin; j<jend; ++j ) {
703  xmm1 += A.load(i,j) * set( x[j] );
704  }
705 
706  y.store( i, xmm1 );
707  }
708 
709  for( ; remainder && i<M; ++i )
710  {
711  const size_t jbegin( ( IsUpper<MT1>::value )
712  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
713  :( 0UL ) );
714  const size_t jend( ( IsLower<MT1>::value )
715  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
716  :( N ) );
717  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
718 
719  ElementType value{};
720 
721  for( size_t j=jbegin; j<jend; ++j ) {
722  value += A(i,j) * x[j];
723  }
724 
725  y[i] = value;
726  }
727  }
729  //**********************************************************************************************
730 
731  //**Default assignment to dense vectors (large matrices)****************************************
745  template< typename VT1 // Type of the left-hand side target vector
746  , typename MT1 // Type of the left-hand side matrix operand
747  , typename VT2 > // Type of the right-hand side vector operand
749  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
750  {
751  selectDefaultAssignKernel( y, A, x );
752  }
754  //**********************************************************************************************
755 
756  //**Vectorized default assignment to dense vectors (large matrices)*****************************
770  template< typename VT1 // Type of the left-hand side target vector
771  , typename MT1 // Type of the left-hand side matrix operand
772  , typename VT2 > // Type of the right-hand side vector operand
774  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
775  {
776  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
777 
778  const size_t M( A.rows() );
779  const size_t N( A.columns() );
780 
781  const size_t iblock( 32768UL / sizeof( ElementType ) );
782  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
783 
784  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
785 
786  reset( y );
787 
788  for( size_t ii=0U; ii<M; ii+=iblock ) {
789  for( size_t jj=0UL; jj<N; jj+=jblock )
790  {
791  const size_t jend( min( jj+jblock, N ) );
792  const size_t itmp( min( ii+iblock, M ) );
793  const size_t iend( ( IsUpper<MT1>::value )
794  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
795  :( itmp ) );
796 
797  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
798  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
799 
800  size_t i( ( IsLower<MT1>::value )
801  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
802  :( ii ) );
803 
804  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
805  {
806  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
807 
808  for( size_t j=jj; j<jend; ++j ) {
809  const SIMDType x1( set( x[j] ) );
810  xmm1 += A.load(i ,j) * x1;
811  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
812  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
813  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
814  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
815  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
816  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
817  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
818  }
819 
820  y.store( i , y.load(i ) + xmm1 );
821  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
822  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
823  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
824  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5 );
825  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6 );
826  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7 );
827  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8 );
828  }
829 
830  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
831  {
832  SIMDType xmm1, xmm2, xmm3, xmm4;
833 
834  for( size_t j=jj; j<jend; ++j ) {
835  const SIMDType x1( set( x[j] ) );
836  xmm1 += A.load(i ,j) * x1;
837  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
838  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
839  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
840  }
841 
842  y.store( i , y.load(i ) + xmm1 );
843  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
844  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
845  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
846  }
847 
848  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
849  {
850  SIMDType xmm1, xmm2, xmm3;
851 
852  for( size_t j=jj; j<jend; ++j ) {
853  const SIMDType x1( set( x[j] ) );
854  xmm1 += A.load(i ,j) * x1;
855  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
856  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
857  }
858 
859  y.store( i , y.load(i ) + xmm1 );
860  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
861  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
862  }
863 
864  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
865  {
866  SIMDType xmm1, xmm2;
867 
868  for( size_t j=jj; j<jend; ++j ) {
869  const SIMDType x1( set( x[j] ) );
870  xmm1 += A.load(i ,j) * x1;
871  xmm2 += A.load(i+SIMDSIZE,j) * x1;
872  }
873 
874  y.store( i , y.load(i ) + xmm1 );
875  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2 );
876  }
877 
878  for( ; i<ipos; i+=SIMDSIZE )
879  {
880  SIMDType xmm1;
881 
882  for( size_t j=jj; j<jend; ++j ) {
883  xmm1 += A.load(i,j) * set( x[j] );
884  }
885 
886  y.store( i, y.load(i) + xmm1 );
887  }
888 
889  for( ; remainder && i<iend; ++i )
890  {
891  ElementType value{};
892 
893  for( size_t j=jj; j<jend; ++j ) {
894  value += A(i,j) * x[j];
895  }
896 
897  y[i] += value;
898  }
899  }
900  }
901  }
903  //**********************************************************************************************
904 
905  //**BLAS-based assignment to dense vectors (default)********************************************
919  template< typename VT1 // Type of the left-hand side target vector
920  , typename MT1 // Type of the left-hand side matrix operand
921  , typename VT2 > // Type of the right-hand side vector operand
923  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
924  {
925  selectLargeAssignKernel( y, A, x );
926  }
928  //**********************************************************************************************
929 
930  //**BLAS-based assignment to dense vectors******************************************************
931 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
932 
945  template< typename VT1 // Type of the left-hand side target vector
946  , typename MT1 // Type of the left-hand side matrix operand
947  , typename VT2 > // Type of the right-hand side vector operand
949  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
950  {
951  using ET = ElementType_<VT1>;
952 
954  assign( y, x );
955  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
956  }
957  else {
958  gemv( y, A, x, ET(1), ET(0) );
959  }
960  }
962 #endif
963  //**********************************************************************************************
964 
965  //**Assignment to sparse vectors****************************************************************
978  template< typename VT1 > // Type of the target sparse vector
979  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
980  {
982 
986 
987  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
988 
989  const ResultType tmp( serial( rhs ) );
990  assign( ~lhs, tmp );
991  }
993  //**********************************************************************************************
994 
995  //**Addition assignment to dense vectors********************************************************
1008  template< typename VT1 > // Type of the target dense vector
1009  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1010  {
1012 
1013  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1014 
1015  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1016  return;
1017  }
1018 
1019  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1020  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1021 
1022  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1023  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1024  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1025  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1026 
1027  TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
1028  }
1030  //**********************************************************************************************
1031 
1032  //**Addition assignment to dense vectors (kernel selection)*************************************
1043  template< typename VT1 // Type of the left-hand side target vector
1044  , typename MT1 // Type of the left-hand side matrix operand
1045  , typename VT2 > // Type of the right-hand side vector operand
1046  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1047  {
1048  if( ( IsDiagonal<MT1>::value ) ||
1049  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1050  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1051  selectSmallAddAssignKernel( y, A, x );
1052  else
1053  selectBlasAddAssignKernel( y, A, x );
1054  }
1056  //**********************************************************************************************
1057 
1058  //**Default addition assignment to dense vectors************************************************
1072  template< typename VT1 // Type of the left-hand side target vector
1073  , typename MT1 // Type of the left-hand side matrix operand
1074  , typename VT2 > // Type of the right-hand side vector operand
1075  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1076  {
1077  const size_t M( A.rows() );
1078  const size_t N( A.columns() );
1079 
1080  for( size_t j=0UL; j<N; ++j )
1081  {
1083  {
1084  y[j] += A(j,j) * x[j];
1085  }
1086  else
1087  {
1088  const size_t ibegin( ( IsLower<MT1>::value )
1089  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1090  :( 0UL ) );
1091  const size_t iend( ( IsUpper<MT1>::value )
1092  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1093  :( M ) );
1094  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1095 
1096  const size_t inum( iend - ibegin );
1097  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1098 
1099  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1100  y[i ] += A(i ,j) * x[j];
1101  y[i+1UL] += A(i+1UL,j) * x[j];
1102  }
1103  if( ipos < iend ) {
1104  y[ipos] += A(ipos,j) * x[j];
1105  }
1106  }
1107  }
1108  }
1110  //**********************************************************************************************
1111 
1112  //**Default addition assignment to dense vectors (small matrices)*******************************
1126  template< typename VT1 // Type of the left-hand side target vector
1127  , typename MT1 // Type of the left-hand side matrix operand
1128  , typename VT2 > // Type of the right-hand side vector operand
1130  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1131  {
1132  selectDefaultAddAssignKernel( y, A, x );
1133  }
1135  //**********************************************************************************************
1136 
1137  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1151  template< typename VT1 // Type of the left-hand side target vector
1152  , typename MT1 // Type of the left-hand side matrix operand
1153  , typename VT2 > // Type of the right-hand side vector operand
1155  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1156  {
1157  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1158 
1159  const size_t M( A.rows() );
1160  const size_t N( A.columns() );
1161 
1162  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
1163  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1164 
1165  size_t i( 0UL );
1166 
1167  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1168  {
1169  const size_t jbegin( ( IsUpper<MT1>::value )
1170  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1171  :( 0UL ) );
1172  const size_t jend( ( IsLower<MT1>::value )
1173  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1174  :( N ) );
1175  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1176 
1177  SIMDType xmm1( y.load(i ) );
1178  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1179  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1180  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1181  SIMDType xmm5( y.load(i+SIMDSIZE*4UL) );
1182  SIMDType xmm6( y.load(i+SIMDSIZE*5UL) );
1183  SIMDType xmm7( y.load(i+SIMDSIZE*6UL) );
1184  SIMDType xmm8( y.load(i+SIMDSIZE*7UL) );
1185 
1186  for( size_t j=jbegin; j<jend; ++j ) {
1187  const SIMDType x1( set( x[j] ) );
1188  xmm1 += A.load(i ,j) * x1;
1189  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1190  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1191  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1192  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
1193  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
1194  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
1195  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
1196  }
1197 
1198  y.store( i , xmm1 );
1199  y.store( i+SIMDSIZE , xmm2 );
1200  y.store( i+SIMDSIZE*2UL, xmm3 );
1201  y.store( i+SIMDSIZE*3UL, xmm4 );
1202  y.store( i+SIMDSIZE*4UL, xmm5 );
1203  y.store( i+SIMDSIZE*5UL, xmm6 );
1204  y.store( i+SIMDSIZE*6UL, xmm7 );
1205  y.store( i+SIMDSIZE*7UL, xmm8 );
1206  }
1207 
1208  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1209  {
1210  const size_t jbegin( ( IsUpper<MT1>::value )
1211  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1212  :( 0UL ) );
1213  const size_t jend( ( IsLower<MT1>::value )
1214  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1215  :( N ) );
1216  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1217 
1218  SIMDType xmm1( y.load(i ) );
1219  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1220  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1221  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1222 
1223  for( size_t j=jbegin; j<jend; ++j ) {
1224  const SIMDType x1( set( x[j] ) );
1225  xmm1 += A.load(i ,j) * x1;
1226  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1227  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1228  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1229  }
1230 
1231  y.store( i , xmm1 );
1232  y.store( i+SIMDSIZE , xmm2 );
1233  y.store( i+SIMDSIZE*2UL, xmm3 );
1234  y.store( i+SIMDSIZE*3UL, xmm4 );
1235  }
1236 
1237  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
1238  {
1239  const size_t jbegin( ( IsUpper<MT1>::value )
1240  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1241  :( 0UL ) );
1242  const size_t jend( ( IsLower<MT1>::value )
1243  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1244  :( N ) );
1245  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1246 
1247  SIMDType xmm1( y.load(i ) );
1248  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1249  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1250 
1251  for( size_t j=jbegin; j<jend; ++j ) {
1252  const SIMDType x1( set( x[j] ) );
1253  xmm1 += A.load(i ,j) * x1;
1254  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1255  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1256  }
1257 
1258  y.store( i , xmm1 );
1259  y.store( i+SIMDSIZE , xmm2 );
1260  y.store( i+SIMDSIZE*2UL, xmm3 );
1261  }
1262 
1263  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
1264  {
1265  const size_t jbegin( ( IsUpper<MT1>::value )
1266  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1267  :( 0UL ) );
1268  const size_t jend( ( IsLower<MT1>::value )
1269  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1270  :( N ) );
1271  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1272 
1273  SIMDType xmm1( y.load(i ) );
1274  SIMDType xmm2( y.load(i+SIMDSIZE) );
1275 
1276  for( size_t j=jbegin; j<jend; ++j ) {
1277  const SIMDType x1( set( x[j] ) );
1278  xmm1 += A.load(i ,j) * x1;
1279  xmm2 += A.load(i+SIMDSIZE,j) * x1;
1280  }
1281 
1282  y.store( i , xmm1 );
1283  y.store( i+SIMDSIZE, xmm2 );
1284  }
1285 
1286  for( ; i<ipos; i+=SIMDSIZE )
1287  {
1288  const size_t jbegin( ( IsUpper<MT1>::value )
1289  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1290  :( 0UL ) );
1291  const size_t jend( ( IsLower<MT1>::value )
1292  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1293  :( N ) );
1294  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1295 
1296  SIMDType xmm1( y.load(i) );
1297 
1298  for( size_t j=jbegin; j<jend; ++j ) {
1299  xmm1 += A.load(i,j) * set( x[j] );
1300  }
1301 
1302  y.store( i, xmm1 );
1303  }
1304 
1305  for( ; remainder && i<M; ++i )
1306  {
1307  const size_t jbegin( ( IsUpper<MT1>::value )
1308  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1309  :( 0UL ) );
1310  const size_t jend( ( IsLower<MT1>::value )
1311  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1312  :( N ) );
1313  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1314 
1315  ElementType value{};
1316 
1317  for( size_t j=jbegin; j<jend; ++j ) {
1318  value += A(i,j) * x[j];
1319  }
1320 
1321  y[i] += value;
1322  }
1323  }
1325  //**********************************************************************************************
1326 
1327  //**Default addition assignment to dense vectors (large matrices)*******************************
1341  template< typename VT1 // Type of the left-hand side target vector
1342  , typename MT1 // Type of the left-hand side matrix operand
1343  , typename VT2 > // Type of the right-hand side vector operand
1345  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1346  {
1347  selectDefaultAddAssignKernel( y, A, x );
1348  }
1350  //**********************************************************************************************
1351 
1352  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1366  template< typename VT1 // Type of the left-hand side target vector
1367  , typename MT1 // Type of the left-hand side matrix operand
1368  , typename VT2 > // Type of the right-hand side vector operand
1370  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1371  {
1372  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1373 
1374  const size_t M( A.rows() );
1375  const size_t N( A.columns() );
1376 
1377  const size_t iblock( 32768UL / sizeof( ElementType ) );
1378  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1379 
1380  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
1381 
1382  for( size_t ii=0U; ii<M; ii+=iblock ) {
1383  for( size_t jj=0UL; jj<N; jj+=jblock )
1384  {
1385  const size_t jend( min( jj+jblock, N ) );
1386  const size_t itmp( min( ii+iblock, M ) );
1387  const size_t iend( ( IsUpper<MT1>::value )
1388  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1389  :( itmp ) );
1390 
1391  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1392  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1393 
1394  size_t i( ( IsLower<MT1>::value )
1395  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
1396  :( ii ) );
1397 
1398  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1399  {
1400  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1401 
1402  for( size_t j=jj; j<jend; ++j ) {
1403  const SIMDType x1( set( x[j] ) );
1404  xmm1 += A.load(i ,j) * x1;
1405  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1406  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1407  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1408  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
1409  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
1410  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
1411  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
1412  }
1413 
1414  y.store( i , y.load(i ) + xmm1 );
1415  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
1416  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
1417  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
1418  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5 );
1419  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6 );
1420  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7 );
1421  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8 );
1422  }
1423 
1424  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1425  {
1426  SIMDType xmm1, xmm2, xmm3, xmm4;
1427 
1428  for( size_t j=jj; j<jend; ++j ) {
1429  const SIMDType x1( set( x[j] ) );
1430  xmm1 += A.load(i ,j) * x1;
1431  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1432  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1433  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1434  }
1435 
1436  y.store( i , y.load(i ) + xmm1 );
1437  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
1438  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
1439  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
1440  }
1441 
1442  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
1443  {
1444  SIMDType xmm1, xmm2, xmm3;
1445 
1446  for( size_t j=jj; j<jend; ++j ) {
1447  const SIMDType x1( set( x[j] ) );
1448  xmm1 += A.load(i ,j) * x1;
1449  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1450  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1451  }
1452 
1453  y.store( i , y.load(i ) + xmm1 );
1454  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
1455  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
1456  }
1457 
1458  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
1459  {
1460  SIMDType xmm1, xmm2;
1461 
1462  for( size_t j=jj; j<jend; ++j ) {
1463  const SIMDType x1( set( x[j] ) );
1464  xmm1 += A.load(i ,j) * x1;
1465  xmm2 += A.load(i+SIMDSIZE,j) * x1;
1466  }
1467 
1468  y.store( i , y.load(i ) + xmm1 );
1469  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2 );
1470  }
1471 
1472  for( ; i<ipos; i+=SIMDSIZE )
1473  {
1474  SIMDType xmm1;
1475 
1476  for( size_t j=jj; j<jend; ++j ) {
1477  xmm1 += A.load(i,j) * set( x[j] );
1478  }
1479 
1480  y.store( i, y.load(i) + xmm1 );
1481  }
1482 
1483  for( ; remainder && i<iend; ++i )
1484  {
1485  ElementType value{};
1486 
1487  for( size_t j=jj; j<jend; ++j ) {
1488  value += A(i,j) * x[j];
1489  }
1490 
1491  y[i] += value;
1492  }
1493  }
1494  }
1495  }
1497  //**********************************************************************************************
1498 
1499  //**BLAS-based addition assignment to dense vectors (default)***********************************
1513  template< typename VT1 // Type of the left-hand side target vector
1514  , typename MT1 // Type of the left-hand side matrix operand
1515  , typename VT2 > // Type of the right-hand side vector operand
1517  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1518  {
1519  selectLargeAddAssignKernel( y, A, x );
1520  }
1522  //**********************************************************************************************
1523 
1524  //**BLAS-based addition assignment to dense vectors*********************************************
1525 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
1526 
1539  template< typename VT1 // Type of the left-hand side target vector
1540  , typename MT1 // Type of the left-hand side matrix operand
1541  , typename VT2 > // Type of the right-hand side vector operand
1543  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1544  {
1545  using ET = ElementType_<VT1>;
1546 
1547  if( IsTriangular<MT1>::value ) {
1548  ResultType_<VT1> tmp( serial( x ) );
1549  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1550  addAssign( y, tmp );
1551  }
1552  else {
1553  gemv( y, A, x, ET(1), ET(1) );
1554  }
1555  }
1557 #endif
1558  //**********************************************************************************************
1559 
1560  //**Addition assignment to sparse vectors*******************************************************
1561  // No special implementation for the addition assignment to sparse vectors.
1562  //**********************************************************************************************
1563 
1564  //**Subtraction assignment to dense vectors*****************************************************
1577  template< typename VT1 > // Type of the target dense vector
1578  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1579  {
1581 
1582  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1583 
1584  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1585  return;
1586  }
1587 
1588  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1589  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1590 
1591  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1592  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1593  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1594  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1595 
1596  TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1597  }
1599  //**********************************************************************************************
1600 
1601  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1612  template< typename VT1 // Type of the left-hand side target vector
1613  , typename MT1 // Type of the left-hand side matrix operand
1614  , typename VT2 > // Type of the right-hand side vector operand
1615  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1616  {
1617  if( ( IsDiagonal<MT1>::value ) ||
1618  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1619  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1620  selectSmallSubAssignKernel( y, A, x );
1621  else
1622  selectBlasSubAssignKernel( y, A, x );
1623  }
1625  //**********************************************************************************************
1626 
1627  //**Default subtraction assignment to dense vectors*********************************************
1641  template< typename VT1 // Type of the left-hand side target vector
1642  , typename MT1 // Type of the left-hand side matrix operand
1643  , typename VT2 > // Type of the right-hand side vector operand
1644  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1645  {
1646  const size_t M( A.rows() );
1647  const size_t N( A.columns() );
1648 
1649  for( size_t j=0UL; j<N; ++j )
1650  {
1652  {
1653  y[j] -= A(j,j) * x[j];
1654  }
1655  else
1656  {
1657  const size_t ibegin( ( IsLower<MT1>::value )
1658  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1659  :( 0UL ) );
1660  const size_t iend( ( IsUpper<MT1>::value )
1661  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1662  :( M ) );
1663  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1664 
1665  const size_t inum( iend - ibegin );
1666  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1667 
1668  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1669  y[i ] -= A(i ,j) * x[j];
1670  y[i+1UL] -= A(i+1UL,j) * x[j];
1671  }
1672  if( ipos < iend ) {
1673  y[ipos] -= A(ipos,j) * x[j];
1674  }
1675  }
1676  }
1677  }
1679  //**********************************************************************************************
1680 
1681  //**Default subtraction assignment to dense vectors (small matrices)****************************
1695  template< typename VT1 // Type of the left-hand side target vector
1696  , typename MT1 // Type of the left-hand side matrix operand
1697  , typename VT2 > // Type of the right-hand side vector operand
1699  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1700  {
1701  selectDefaultSubAssignKernel( y, A, x );
1702  }
1704  //**********************************************************************************************
1705 
1706  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1721  template< typename VT1 // Type of the left-hand side target vector
1722  , typename MT1 // Type of the left-hand side matrix operand
1723  , typename VT2 > // Type of the right-hand side vector operand
1725  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1726  {
1727  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1728 
1729  const size_t M( A.rows() );
1730  const size_t N( A.columns() );
1731 
1732  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
1733  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1734 
1735  size_t i( 0UL );
1736 
1737  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1738  {
1739  const size_t jbegin( ( IsUpper<MT1>::value )
1740  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1741  :( 0UL ) );
1742  const size_t jend( ( IsLower<MT1>::value )
1743  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1744  :( N ) );
1745  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1746 
1747  SIMDType xmm1( y.load(i ) );
1748  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1749  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1750  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1751  SIMDType xmm5( y.load(i+SIMDSIZE*4UL) );
1752  SIMDType xmm6( y.load(i+SIMDSIZE*5UL) );
1753  SIMDType xmm7( y.load(i+SIMDSIZE*6UL) );
1754  SIMDType xmm8( y.load(i+SIMDSIZE*7UL) );
1755 
1756  for( size_t j=jbegin; j<jend; ++j ) {
1757  const SIMDType x1( set( x[j] ) );
1758  xmm1 -= A.load(i ,j) * x1;
1759  xmm2 -= A.load(i+SIMDSIZE ,j) * x1;
1760  xmm3 -= A.load(i+SIMDSIZE*2UL,j) * x1;
1761  xmm4 -= A.load(i+SIMDSIZE*3UL,j) * x1;
1762  xmm5 -= A.load(i+SIMDSIZE*4UL,j) * x1;
1763  xmm6 -= A.load(i+SIMDSIZE*5UL,j) * x1;
1764  xmm7 -= A.load(i+SIMDSIZE*6UL,j) * x1;
1765  xmm8 -= A.load(i+SIMDSIZE*7UL,j) * x1;
1766  }
1767 
1768  y.store( i , xmm1 );
1769  y.store( i+SIMDSIZE , xmm2 );
1770  y.store( i+SIMDSIZE*2UL, xmm3 );
1771  y.store( i+SIMDSIZE*3UL, xmm4 );
1772  y.store( i+SIMDSIZE*4UL, xmm5 );
1773  y.store( i+SIMDSIZE*5UL, xmm6 );
1774  y.store( i+SIMDSIZE*6UL, xmm7 );
1775  y.store( i+SIMDSIZE*7UL, xmm8 );
1776  }
1777 
1778  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1779  {
1780  const size_t jbegin( ( IsUpper<MT1>::value )
1781  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1782  :( 0UL ) );
1783  const size_t jend( ( IsLower<MT1>::value )
1784  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1785  :( N ) );
1786  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1787 
1788  SIMDType xmm1( y.load(i ) );
1789  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1790  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1791  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1792 
1793  for( size_t j=jbegin; j<jend; ++j ) {
1794  const SIMDType x1( set( x[j] ) );
1795  xmm1 -= A.load(i ,j) * x1;
1796  xmm2 -= A.load(i+SIMDSIZE ,j) * x1;
1797  xmm3 -= A.load(i+SIMDSIZE*2UL,j) * x1;
1798  xmm4 -= A.load(i+SIMDSIZE*3UL,j) * x1;
1799  }
1800 
1801  y.store( i , xmm1 );
1802  y.store( i+SIMDSIZE , xmm2 );
1803  y.store( i+SIMDSIZE*2UL, xmm3 );
1804  y.store( i+SIMDSIZE*3UL, xmm4 );
1805  }
1806 
1807  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
1808  {
1809  const size_t jbegin( ( IsUpper<MT1>::value )
1810  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1811  :( 0UL ) );
1812  const size_t jend( ( IsLower<MT1>::value )
1813  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1814  :( N ) );
1815  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1816 
1817  SIMDType xmm1( y.load(i ) );
1818  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1819  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1820 
1821  for( size_t j=jbegin; j<jend; ++j ) {
1822  const SIMDType x1( set( x[j] ) );
1823  xmm1 -= A.load(i ,j) * x1;
1824  xmm2 -= A.load(i+SIMDSIZE ,j) * x1;
1825  xmm3 -= A.load(i+SIMDSIZE*2UL,j) * x1;
1826  }
1827 
1828  y.store( i , xmm1 );
1829  y.store( i+SIMDSIZE , xmm2 );
1830  y.store( i+SIMDSIZE*2UL, xmm3 );
1831  }
1832 
1833  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
1834  {
1835  const size_t jbegin( ( IsUpper<MT1>::value )
1836  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1837  :( 0UL ) );
1838  const size_t jend( ( IsLower<MT1>::value )
1839  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1840  :( N ) );
1841  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1842 
1843  SIMDType xmm1( y.load(i ) );
1844  SIMDType xmm2( y.load(i+SIMDSIZE) );
1845 
1846  for( size_t j=jbegin; j<jend; ++j ) {
1847  const SIMDType x1( set( x[j] ) );
1848  xmm1 -= A.load(i ,j) * x1;
1849  xmm2 -= A.load(i+SIMDSIZE,j) * x1;
1850  }
1851 
1852  y.store( i , xmm1 );
1853  y.store( i+SIMDSIZE, xmm2 );
1854  }
1855 
1856  for( ; i<ipos; i+=SIMDSIZE )
1857  {
1858  const size_t jbegin( ( IsUpper<MT1>::value )
1859  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1860  :( 0UL ) );
1861  const size_t jend( ( IsLower<MT1>::value )
1862  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1863  :( N ) );
1864  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1865 
1866  SIMDType xmm1( y.load(i) );
1867 
1868  for( size_t j=jbegin; j<jend; ++j ) {
1869  xmm1 -= A.load(i,j) * set( x[j] );
1870  }
1871 
1872  y.store( i, xmm1 );
1873  }
1874 
1875  for( ; remainder && i<M; ++i )
1876  {
1877  const size_t jbegin( ( IsUpper<MT1>::value )
1878  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1879  :( 0UL ) );
1880  const size_t jend( ( IsLower<MT1>::value )
1881  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1882  :( N ) );
1883  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1884 
1885  ElementType value{};
1886 
1887  for( size_t j=jbegin; j<jend; ++j ) {
1888  value += A(i,j) * x[j];
1889  }
1890 
1891  y[i] -= value;
1892  }
1893  }
1895  //**********************************************************************************************
1896 
1897  //**Default subtraction assignment to dense vectors (large matrices)****************************
1911  template< typename VT1 // Type of the left-hand side target vector
1912  , typename MT1 // Type of the left-hand side matrix operand
1913  , typename VT2 > // Type of the right-hand side vector operand
1915  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1916  {
1917  selectDefaultSubAssignKernel( y, A, x );
1918  }
1920  //**********************************************************************************************
1921 
1922  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
1937  template< typename VT1 // Type of the left-hand side target vector
1938  , typename MT1 // Type of the left-hand side matrix operand
1939  , typename VT2 > // Type of the right-hand side vector operand
1941  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1942  {
1943  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1944 
1945  const size_t M( A.rows() );
1946  const size_t N( A.columns() );
1947 
1948  const size_t iblock( 32768UL / sizeof( ElementType ) );
1949  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1950 
1951  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
1952 
1953  for( size_t ii=0U; ii<M; ii+=iblock ) {
1954  for( size_t jj=0UL; jj<N; jj+=jblock )
1955  {
1956  const size_t jend( min( jj+jblock, N ) );
1957  const size_t itmp( min( ii+iblock, M ) );
1958  const size_t iend( ( IsUpper<MT1>::value )
1959  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1960  :( itmp ) );
1961 
1962  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1963  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1964 
1965  size_t i( ( IsLower<MT1>::value )
1966  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
1967  :( ii ) );
1968 
1969  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1970  {
1971  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1972 
1973  for( size_t j=jj; j<jend; ++j ) {
1974  const SIMDType x1( set( x[j] ) );
1975  xmm1 += A.load(i ,j) * x1;
1976  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1977  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1978  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1979  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
1980  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
1981  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
1982  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
1983  }
1984 
1985  y.store( i , y.load(i ) - xmm1 );
1986  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2 );
1987  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3 );
1988  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4 );
1989  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) - xmm5 );
1990  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) - xmm6 );
1991  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) - xmm7 );
1992  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) - xmm8 );
1993  }
1994 
1995  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1996  {
1997  SIMDType xmm1, xmm2, xmm3, xmm4;
1998 
1999  for( size_t j=jj; j<jend; ++j ) {
2000  const SIMDType x1( set( x[j] ) );
2001  xmm1 += A.load(i ,j) * x1;
2002  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2003  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2004  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
2005  }
2006 
2007  y.store( i , y.load(i ) - xmm1 );
2008  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2 );
2009  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3 );
2010  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4 );
2011  }
2012 
2013  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
2014  {
2015  SIMDType xmm1, xmm2, xmm3;
2016 
2017  for( size_t j=jj; j<jend; ++j ) {
2018  const SIMDType x1( set( x[j] ) );
2019  xmm1 += A.load(i ,j) * x1;
2020  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2021  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2022  }
2023 
2024  y.store( i , y.load(i ) - xmm1 );
2025  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2 );
2026  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3 );
2027  }
2028 
2029  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
2030  {
2031  SIMDType xmm1, xmm2;
2032 
2033  for( size_t j=jj; j<jend; ++j ) {
2034  const SIMDType x1( set( x[j] ) );
2035  xmm1 += A.load(i ,j) * x1;
2036  xmm2 += A.load(i+SIMDSIZE,j) * x1;
2037  }
2038 
2039  y.store( i , y.load(i ) - xmm1 );
2040  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) - xmm2 );
2041  }
2042 
2043  for( ; i<ipos; i+=SIMDSIZE )
2044  {
2045  SIMDType xmm1;
2046 
2047  for( size_t j=jj; j<jend; ++j ) {
2048  xmm1 += A.load(i,j) * set( x[j] );
2049  }
2050 
2051  y.store( i, y.load(i) - xmm1 );
2052  }
2053 
2054  for( ; remainder && i<iend; ++i )
2055  {
2056  ElementType value{};
2057 
2058  for( size_t j=jj; j<jend; ++j ) {
2059  value += A(i,j) * x[j];
2060  }
2061 
2062  y[i] -= value;
2063  }
2064  }
2065  }
2066  }
2068  //**********************************************************************************************
2069 
2070  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2084  template< typename VT1 // Type of the left-hand side target vector
2085  , typename MT1 // Type of the left-hand side matrix operand
2086  , typename VT2 > // Type of the right-hand side vector operand
2088  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2089  {
2090  selectLargeSubAssignKernel( y, A, x );
2091  }
2093  //**********************************************************************************************
2094 
2095  //**BLAS-based subtraction assignment to dense vectors******************************************
2096 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
2097 
2110  template< typename VT1 // Type of the left-hand side target vector
2111  , typename MT1 // Type of the left-hand side matrix operand
2112  , typename VT2 > // Type of the right-hand side vector operand
2114  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2115  {
2116  using ET = ElementType_<VT1>;
2117 
2118  if( IsTriangular<MT1>::value ) {
2119  ResultType_<VT1> tmp( serial( x ) );
2120  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2121  subAssign( y, tmp );
2122  }
2123  else {
2124  gemv( y, A, x, ET(-1), ET(1) );
2125  }
2126  }
2128 #endif
2129  //**********************************************************************************************
2130 
2131  //**Subtraction assignment to sparse vectors****************************************************
2132  // No special implementation for the subtraction assignment to sparse vectors.
2133  //**********************************************************************************************
2134 
2135  //**Multiplication assignment to dense vectors**************************************************
2148  template< typename VT1 > // Type of the target dense vector
2149  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2150  {
2152 
2156 
2157  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2158 
2159  const ResultType tmp( serial( rhs ) );
2160  multAssign( ~lhs, tmp );
2161  }
2163  //**********************************************************************************************
2164 
2165  //**Multiplication assignment to sparse vectors*************************************************
2166  // No special implementation for the multiplication assignment to sparse vectors.
2167  //**********************************************************************************************
2168 
2169  //**Division assignment to dense vectors********************************************************
2182  template< typename VT1 > // Type of the target dense vector
2183  friend inline void divAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2184  {
2186 
2190 
2191  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2192 
2193  const ResultType tmp( serial( rhs ) );
2194  divAssign( ~lhs, tmp );
2195  }
2197  //**********************************************************************************************
2198 
2199  //**Division assignment to sparse vectors*******************************************************
2200  // No special implementation for the division assignment to sparse vectors.
2201  //**********************************************************************************************
2202 
2203  //**SMP assignment to dense vectors*************************************************************
2218  template< typename VT1 > // Type of the target dense vector
2219  friend inline EnableIf_< UseSMPAssign<VT1> >
2221  {
2223 
2224  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2225 
2226  if( rhs.mat_.rows() == 0UL ) {
2227  return;
2228  }
2229  else if( rhs.mat_.columns() == 0UL ) {
2230  reset( ~lhs );
2231  return;
2232  }
2233 
2234  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2235  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2236 
2237  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2238  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2239  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2240  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2241 
2242  smpAssign( ~lhs, A * x );
2243  }
2245  //**********************************************************************************************
2246 
2247  //**SMP assignment to sparse vectors************************************************************
2262  template< typename VT1 > // Type of the target sparse vector
2263  friend inline EnableIf_< UseSMPAssign<VT1> >
2265  {
2267 
2271 
2272  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2273 
2274  const ResultType tmp( rhs );
2275  smpAssign( ~lhs, tmp );
2276  }
2278  //**********************************************************************************************
2279 
2280  //**SMP addition assignment to dense vectors****************************************************
2295  template< typename VT1 > // Type of the target dense vector
2296  friend inline EnableIf_< UseSMPAssign<VT1> >
2298  {
2300 
2301  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2302 
2303  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2304  return;
2305  }
2306 
2307  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2308  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2309 
2310  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2311  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2312  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2313  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2314 
2315  smpAddAssign( ~lhs, A * x );
2316  }
2318  //**********************************************************************************************
2319 
2320  //**SMP addition assignment to sparse vectors***************************************************
2321  // No special implementation for the SMP addition assignment to sparse vectors.
2322  //**********************************************************************************************
2323 
2324  //**SMP subtraction assignment to dense vectors*************************************************
2339  template< typename VT1 > // Type of the target dense vector
2340  friend inline EnableIf_< UseSMPAssign<VT1> >
2342  {
2344 
2345  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2346 
2347  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2348  return;
2349  }
2350 
2351  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2352  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2353 
2354  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2355  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2356  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2357  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2358 
2359  smpSubAssign( ~lhs, A * x );
2360  }
2362  //**********************************************************************************************
2363 
2364  //**SMP subtraction assignment to sparse vectors************************************************
2365  // No special implementation for the SMP subtraction assignment to sparse vectors.
2366  //**********************************************************************************************
2367 
2368  //**SMP multiplication assignment to dense vectors**********************************************
2383  template< typename VT1 > // Type of the target dense vector
2384  friend inline EnableIf_< UseSMPAssign<VT1> >
2386  {
2388 
2392 
2393  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2394 
2395  const ResultType tmp( rhs );
2396  smpMultAssign( ~lhs, tmp );
2397  }
2399  //**********************************************************************************************
2400 
2401  //**SMP multiplication assignment to sparse vectors*********************************************
2402  // No special implementation for the SMP multiplication assignment to sparse vectors.
2403  //**********************************************************************************************
2404 
2405  //**SMP division assignment to dense vectors****************************************************
2420  template< typename VT1 > // Type of the target dense vector
2421  friend inline EnableIf_< UseSMPAssign<VT1> >
2423  {
2425 
2429 
2430  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2431 
2432  const ResultType tmp( rhs );
2433  smpDivAssign( ~lhs, tmp );
2434  }
2436  //**********************************************************************************************
2437 
2438  //**SMP division assignment to sparse vectors***************************************************
2439  // No special implementation for the SMP division assignment to sparse vectors.
2440  //**********************************************************************************************
2441 
2442  //**Compile time checks*************************************************************************
2450  //**********************************************************************************************
2451 };
2452 //*************************************************************************************************
2453 
2454 
2455 
2456 
2457 //=================================================================================================
2458 //
2459 // DVECSCALARMULTEXPR SPECIALIZATION
2460 //
2461 //=================================================================================================
2462 
2463 //*************************************************************************************************
2472 template< typename MT // Type of the left-hand side dense matrix
2473  , typename VT // Type of the right-hand side dense vector
2474  , typename ST > // Type of the side scalar value
2475 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
2476  : public VecScalarMultExpr< DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false > >
2477  , private Computation
2478 {
2479  private:
2480  //**Type definitions****************************************************************************
2481  using MVM = TDMatDVecMultExpr<MT,VT>;
2482  using RES = ResultType_<MVM>;
2483  using MRT = ResultType_<MT>;
2484  using VRT = ResultType_<VT>;
2485  using MET = ElementType_<MRT>;
2486  using VET = ElementType_<VRT>;
2487  using MCT = CompositeType_<MT>;
2488  using VCT = CompositeType_<VT>;
2489  //**********************************************************************************************
2490 
2491  //**********************************************************************************************
2493  enum : bool { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2495  //**********************************************************************************************
2496 
2497  //**********************************************************************************************
2499  enum : bool { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2500  //**********************************************************************************************
2501 
2502  //**********************************************************************************************
2504 
2507  template< typename T1 >
2508  struct UseSMPAssign {
2509  enum : bool { value = ( evaluateMatrix || evaluateVector ) };
2510  };
2511  //**********************************************************************************************
2512 
2513  //**********************************************************************************************
2515 
2517  template< typename T1, typename T2, typename T3, typename T4 >
2518  struct UseBlasKernel {
2524  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
2529  IsSame< ElementType_<T1>, ElementType_<T3> >::value &&
2531  };
2532  //**********************************************************************************************
2533 
2534  //**********************************************************************************************
2536 
2539  template< typename T1, typename T2, typename T3, typename T4 >
2540  struct UseVectorizedDefaultKernel {
2541  enum : bool { value = useOptimizedKernels &&
2543  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
2547  , T4 >::value &&
2548  HasSIMDAdd< ElementType_<T2>, ElementType_<T3> >::value &&
2549  HasSIMDMult< ElementType_<T2>, ElementType_<T3> >::value };
2550  };
2551  //**********************************************************************************************
2552 
2553  public:
2554  //**Type definitions****************************************************************************
2556  using ResultType = MultTrait_<RES,ST>;
2560  using ReturnType = const ElementType;
2561  using CompositeType = const ResultType;
2562 
2564  using LeftOperand = const TDMatDVecMultExpr<MT,VT>;
2565 
2567  using RightOperand = ST;
2568 
2571 
2574  //**********************************************************************************************
2575 
2576  //**Compilation flags***************************************************************************
2578  enum : bool { simdEnabled = !IsDiagonal<MT>::value &&
2579  MT::simdEnabled && VT::simdEnabled &&
2583 
2585  enum : bool { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2586  !evaluateVector && VT::smpAssignable };
2587  //**********************************************************************************************
2588 
2589  //**SIMD properties*****************************************************************************
2591  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
2592  //**********************************************************************************************
2593 
2594  //**Constructor*********************************************************************************
2600  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
2601  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2602  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2603  {}
2604  //**********************************************************************************************
2605 
2606  //**Subscript operator**************************************************************************
2612  inline ReturnType operator[]( size_t index ) const {
2613  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2614  return vector_[index] * scalar_;
2615  }
2616  //**********************************************************************************************
2617 
2618  //**At function*********************************************************************************
2625  inline ReturnType at( size_t index ) const {
2626  if( index >= vector_.size() ) {
2627  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
2628  }
2629  return (*this)[index];
2630  }
2631  //**********************************************************************************************
2632 
2633  //**Size function*******************************************************************************
2638  inline size_t size() const {
2639  return vector_.size();
2640  }
2641  //**********************************************************************************************
2642 
2643  //**Left operand access*************************************************************************
2648  inline LeftOperand leftOperand() const {
2649  return vector_;
2650  }
2651  //**********************************************************************************************
2652 
2653  //**Right operand access************************************************************************
2658  inline RightOperand rightOperand() const {
2659  return scalar_;
2660  }
2661  //**********************************************************************************************
2662 
2663  //**********************************************************************************************
2669  template< typename T >
2670  inline bool canAlias( const T* alias ) const {
2671  return vector_.canAlias( alias );
2672  }
2673  //**********************************************************************************************
2674 
2675  //**********************************************************************************************
2681  template< typename T >
2682  inline bool isAliased( const T* alias ) const {
2683  return vector_.isAliased( alias );
2684  }
2685  //**********************************************************************************************
2686 
2687  //**********************************************************************************************
2692  inline bool isAligned() const {
2693  return vector_.isAligned();
2694  }
2695  //**********************************************************************************************
2696 
2697  //**********************************************************************************************
2702  inline bool canSMPAssign() const noexcept {
2703  LeftOperand_<MVM> A( vector_.leftOperand() );
2704  return ( !BLAZE_BLAS_MODE ||
2707  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2708  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2709  ( size() > SMP_TDMATDVECMULT_THRESHOLD );
2710  }
2711  //**********************************************************************************************
2712 
2713  private:
2714  //**Member variables****************************************************************************
2715  LeftOperand vector_;
2716  RightOperand scalar_;
2717  //**********************************************************************************************
2718 
2719  //**Assignment to dense vectors*****************************************************************
2731  template< typename VT1 > // Type of the target dense vector
2732  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2733  {
2735 
2736  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2737 
2738  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
2739  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
2740 
2741  if( left.rows() == 0UL ) {
2742  return;
2743  }
2744  else if( left.columns() == 0UL ) {
2745  reset( ~lhs );
2746  return;
2747  }
2748 
2749  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2750  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2751 
2752  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2753  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2754  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2755  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2756 
2757  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2758  }
2759  //**********************************************************************************************
2760 
2761  //**Assignment to dense vectors (kernel selection)**********************************************
2772  template< typename VT1 // Type of the left-hand side target vector
2773  , typename MT1 // Type of the left-hand side matrix operand
2774  , typename VT2 // Type of the right-hand side vector operand
2775  , typename ST2 > // Type of the scalar value
2776  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2777  {
2778  if( ( IsDiagonal<MT1>::value ) ||
2779  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2780  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2781  selectSmallAssignKernel( y, A, x, scalar );
2782  else
2783  selectBlasAssignKernel( y, A, x, scalar );
2784  }
2785  //**********************************************************************************************
2786 
2787  //**Default assignment to dense vectors*********************************************************
2801  template< typename VT1 // Type of the left-hand side target vector
2802  , typename MT1 // Type of the left-hand side matrix operand
2803  , typename VT2 // Type of the right-hand side vector operand
2804  , typename ST2 > // Type of the scalar value
2805  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2806  {
2807  const size_t M( A.rows() );
2808  const size_t N( A.columns() );
2809 
2811  reset( y[0] );
2812  }
2813 
2814  if( !IsUpper<MT1>::value )
2815  {
2816  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
2817  y[i] = A(i,0UL) * x[0UL];
2818  }
2819  }
2820 
2821  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
2822  {
2824  {
2825  y[j] = A(j,j) * x[j] * scalar;
2826  }
2827  else
2828  {
2829  const size_t ibegin( ( IsLower<MT1>::value )
2830  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2831  :( 0UL ) );
2832  const size_t iend( ( IsUpper<MT1>::value )
2833  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
2834  :( M ) );
2835  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2836 
2837  const size_t inum( iend - ibegin );
2838  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2839 
2840  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2841  y[i ] += A(i ,j) * x[j];
2842  y[i+1UL] += A(i+1UL,j) * x[j];
2843  }
2844  if( ipos < iend ) {
2845  y[ipos] += A(ipos,j) * x[j];
2846  }
2847  if( IsUpper<MT1>::value ) {
2848  y[iend] = A(iend,j) * x[j];
2849  }
2850  }
2851  }
2852 
2854  reset( y[M-1UL] );
2855  }
2856 
2857  if( !IsDiagonal<MT1>::value )
2858  {
2859  const size_t iend( IsStrictlyUpper<MT1>::value ? M-1UL : M );
2860  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<iend; ++i ) {
2861  y[i] *= scalar;
2862  }
2863  }
2864  }
2865  //**********************************************************************************************
2866 
2867  //**Default assignment to dense vectors (small matrices)****************************************
2881  template< typename VT1 // Type of the left-hand side target vector
2882  , typename MT1 // Type of the left-hand side matrix operand
2883  , typename VT2 // Type of the right-hand side vector operand
2884  , typename ST2 > // Type of the scalar value
2886  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2887  {
2888  selectDefaultAssignKernel( y, A, x, scalar );
2889  }
2890  //**********************************************************************************************
2891 
2892  //**Vectorized default assignment to dense vectors (small matrices)*****************************
2906  template< typename VT1 // Type of the left-hand side target vector
2907  , typename MT1 // Type of the left-hand side matrix operand
2908  , typename VT2 // Type of the right-hand side vector operand
2909  , typename ST2 > // Type of the scalar value
2911  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2912  {
2913  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
2914 
2915  const size_t M( A.rows() );
2916  const size_t N( A.columns() );
2917 
2918  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
2919  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
2920 
2921  const SIMDType factor( set( scalar ) );
2922 
2923  size_t i( 0UL );
2924 
2925  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
2926  {
2927  const size_t jbegin( ( IsUpper<MT1>::value )
2928  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2929  :( 0UL ) );
2930  const size_t jend( ( IsLower<MT1>::value )
2931  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2932  :( N ) );
2933  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2934 
2935  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2936 
2937  for( size_t j=jbegin; j<jend; ++j ) {
2938  const SIMDType x1( set( x[j] ) );
2939  xmm1 += A.load(i ,j) * x1;
2940  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2941  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2942  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
2943  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
2944  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
2945  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
2946  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
2947  }
2948 
2949  y.store( i , xmm1*factor );
2950  y.store( i+SIMDSIZE , xmm2*factor );
2951  y.store( i+SIMDSIZE*2UL, xmm3*factor );
2952  y.store( i+SIMDSIZE*3UL, xmm4*factor );
2953  y.store( i+SIMDSIZE*4UL, xmm5*factor );
2954  y.store( i+SIMDSIZE*5UL, xmm6*factor );
2955  y.store( i+SIMDSIZE*6UL, xmm7*factor );
2956  y.store( i+SIMDSIZE*7UL, xmm8*factor );
2957  }
2958 
2959  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
2960  {
2961  const size_t jbegin( ( IsUpper<MT1>::value )
2962  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2963  :( 0UL ) );
2964  const size_t jend( ( IsLower<MT1>::value )
2965  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2966  :( N ) );
2967  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2968 
2969  SIMDType xmm1, xmm2, xmm3, xmm4;
2970 
2971  for( size_t j=jbegin; j<jend; ++j ) {
2972  const SIMDType x1( set( x[j] ) );
2973  xmm1 += A.load(i ,j) * x1;
2974  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2975  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2976  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
2977  }
2978 
2979  y.store( i , xmm1*factor );
2980  y.store( i+SIMDSIZE , xmm2*factor );
2981  y.store( i+SIMDSIZE*2UL, xmm3*factor );
2982  y.store( i+SIMDSIZE*3UL, xmm4*factor );
2983  }
2984 
2985  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
2986  {
2987  const size_t jbegin( ( IsUpper<MT1>::value )
2988  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2989  :( 0UL ) );
2990  const size_t jend( ( IsLower<MT1>::value )
2991  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2992  :( N ) );
2993  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2994 
2995  SIMDType xmm1, xmm2, xmm3;
2996 
2997  for( size_t j=jbegin; j<jend; ++j ) {
2998  const SIMDType x1( set( x[j] ) );
2999  xmm1 += A.load(i ,j) * x1;
3000  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3001  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3002  }
3003 
3004  y.store( i , xmm1*factor );
3005  y.store( i+SIMDSIZE , xmm2*factor );
3006  y.store( i+SIMDSIZE*2UL, xmm3*factor );
3007  }
3008 
3009  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3010  {
3011  const size_t jbegin( ( IsUpper<MT1>::value )
3012  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3013  :( 0UL ) );
3014  const size_t jend( ( IsLower<MT1>::value )
3015  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3016  :( N ) );
3017  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3018 
3019  SIMDType xmm1, xmm2;
3020 
3021  for( size_t j=jbegin; j<jend; ++j ) {
3022  const SIMDType x1( set( x[j] ) );
3023  xmm1 += A.load(i ,j) * x1;
3024  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3025  }
3026 
3027  y.store( i , xmm1*factor );
3028  y.store( i+SIMDSIZE, xmm2*factor );
3029  }
3030 
3031  for( ; i<ipos; i+=SIMDSIZE )
3032  {
3033  const size_t jbegin( ( IsUpper<MT1>::value )
3034  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3035  :( 0UL ) );
3036  const size_t jend( ( IsLower<MT1>::value )
3037  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3038  :( N ) );
3039  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3040 
3041  SIMDType xmm1;
3042 
3043  for( size_t j=jbegin; j<jend; ++j ) {
3044  const SIMDType x1( set( x[j] ) );
3045  xmm1 += A.load(i,j) * x1;
3046  }
3047 
3048  y.store( i, xmm1*factor );
3049  }
3050 
3051  for( ; remainder && i<M; ++i )
3052  {
3053  const size_t jbegin( ( IsUpper<MT1>::value )
3054  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3055  :( 0UL ) );
3056  const size_t jend( ( IsLower<MT1>::value )
3057  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3058  :( N ) );
3059  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3060 
3061  ElementType value{};
3062 
3063  for( size_t j=jbegin; j<jend; ++j ) {
3064  value += A(i,j) * x[j];
3065  }
3066 
3067  y[i] = value * scalar;
3068  }
3069  }
3070  //**********************************************************************************************
3071 
3072  //**Default assignment to dense vectors (large matrices)****************************************
3086  template< typename VT1 // Type of the left-hand side target vector
3087  , typename MT1 // Type of the left-hand side matrix operand
3088  , typename VT2 // Type of the right-hand side vector operand
3089  , typename ST2 > // Type of the scalar value
3091  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3092  {
3093  selectDefaultAssignKernel( y, A, x, scalar );
3094  }
3095  //**********************************************************************************************
3096 
3097  //**Vectorized default assignment to dense vectors (large matrices)*****************************
3111  template< typename VT1 // Type of the left-hand side target vector
3112  , typename MT1 // Type of the left-hand side matrix operand
3113  , typename VT2 // Type of the right-hand side vector operand
3114  , typename ST2 > // Type of the scalar value
3116  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3117  {
3118  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3119 
3120  const size_t M( A.rows() );
3121  const size_t N( A.columns() );
3122 
3123  const size_t iblock( 32768UL / sizeof( ElementType ) );
3124  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3125 
3126  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
3127 
3128  const SIMDType factor( set( scalar ) );
3129 
3130  reset( y );
3131 
3132  for( size_t ii=0U; ii<M; ii+=iblock ) {
3133  for( size_t jj=0UL; jj<N; jj+=jblock )
3134  {
3135  const size_t jend( min( jj+jblock, N ) );
3136  const size_t itmp( min( ii+iblock, M ) );
3137  const size_t iend( ( IsUpper<MT1>::value )
3138  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3139  :( itmp ) );
3140 
3141  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
3142  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
3143 
3144  size_t i( ( IsLower<MT1>::value )
3145  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
3146  :( ii ) );
3147 
3148  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
3149  {
3150  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3151 
3152  for( size_t j=jj; j<jend; ++j ) {
3153  const SIMDType x1( set( x[j] ) );
3154  xmm1 += A.load(i ,j) * x1;
3155  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3156  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3157  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3158  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
3159  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
3160  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
3161  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
3162  }
3163 
3164  y.store( i , y.load(i ) + xmm1*factor );
3165  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3166  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3167  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3168  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5*factor );
3169  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6*factor );
3170  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7*factor );
3171  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8*factor );
3172  }
3173 
3174  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
3175  {
3176  SIMDType xmm1, xmm2, xmm3, xmm4;
3177 
3178  for( size_t j=jj; j<jend; ++j ) {
3179  const SIMDType x1( set( x[j] ) );
3180  xmm1 += A.load(i ,j) * x1;
3181  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3182  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3183  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3184  }
3185 
3186  y.store( i , y.load(i ) + xmm1*factor );
3187  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3188  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3189  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3190  }
3191 
3192  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
3193  {
3194  SIMDType xmm1, xmm2, xmm3;
3195 
3196  for( size_t j=jj; j<jend; ++j ) {
3197  const SIMDType x1( set( x[j] ) );
3198  xmm1 += A.load(i ,j) * x1;
3199  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3200  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3201  }
3202 
3203  y.store( i , y.load(i ) + xmm1*factor );
3204  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3205  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3206  }
3207 
3208  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3209  {
3210  SIMDType xmm1, xmm2;
3211 
3212  for( size_t j=jj; j<jend; ++j ) {
3213  const SIMDType x1( set( x[j] ) );
3214  xmm1 += A.load(i ,j) * x1;
3215  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3216  }
3217 
3218  y.store( i , y.load(i ) + xmm1*factor );
3219  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2*factor );
3220  }
3221 
3222  for( ; i<ipos; i+=SIMDSIZE )
3223  {
3224  SIMDType xmm1;
3225 
3226  for( size_t j=jj; j<jend; ++j ) {
3227  xmm1 += A.load(i,j) * set( x[j] );
3228  }
3229 
3230  y.store( i, y.load(i) + xmm1*factor );
3231  }
3232 
3233  for( ; remainder && i<iend; ++i )
3234  {
3235  ElementType value{};
3236 
3237  for( size_t j=jj; j<jend; ++j ) {
3238  value += A(i,j) * x[j];
3239  }
3240 
3241  y[i] += value * scalar;
3242  }
3243  }
3244  }
3245  }
3246  //**********************************************************************************************
3247 
3248  //**BLAS-based assignment to dense vectors (default)********************************************
3262  template< typename VT1 // Type of the left-hand side target vector
3263  , typename MT1 // Type of the left-hand side matrix operand
3264  , typename VT2 // Type of the right-hand side vector operand
3265  , typename ST2 > // Type of the scalar value
3267  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3268  {
3269  selectLargeAssignKernel( y, A, x, scalar );
3270  }
3271  //**********************************************************************************************
3272 
3273  //**BLAS-based assignment to dense vectors******************************************************
3274 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
3275 
3288  template< typename VT1 // Type of the left-hand side target vector
3289  , typename MT1 // Type of the left-hand side matrix operand
3290  , typename VT2 // Type of the right-hand side vector operand
3291  , typename ST2 > // Type of the scalar value
3293  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3294  {
3295  using ET = ElementType_<VT1>;
3296 
3297  if( IsTriangular<MT1>::value ) {
3298  assign( y, scalar * x );
3299  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3300  }
3301  else {
3302  gemv( y, A, x, ET(scalar), ET(0) );
3303  }
3304  }
3305 #endif
3306  //**********************************************************************************************
3307 
3308  //**Assignment to sparse vectors****************************************************************
3320  template< typename VT1 > // Type of the target sparse vector
3321  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3322  {
3324 
3328 
3329  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3330 
3331  const ResultType tmp( serial( rhs ) );
3332  assign( ~lhs, tmp );
3333  }
3334  //**********************************************************************************************
3335 
3336  //**Addition assignment to dense vectors********************************************************
3348  template< typename VT1 > // Type of the target dense vector
3349  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3350  {
3352 
3353  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3354 
3355  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
3356  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
3357 
3358  if( left.rows() == 0UL || left.columns() == 0UL ) {
3359  return;
3360  }
3361 
3362  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3363  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3364 
3365  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3366  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3367  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3368  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3369 
3370  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
3371  }
3372  //**********************************************************************************************
3373 
3374  //**Addition assignment to dense vectors (kernel selection)*************************************
3385  template< typename VT1 // Type of the left-hand side target vector
3386  , typename MT1 // Type of the left-hand side matrix operand
3387  , typename VT2 // Type of the right-hand side vector operand
3388  , typename ST2 > // Type of the scalar value
3389  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3390  {
3391  if( ( IsDiagonal<MT1>::value ) ||
3392  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3393  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3394  selectSmallAddAssignKernel( y, A, x, scalar );
3395  else
3396  selectBlasAddAssignKernel( y, A, x, scalar );
3397  }
3398  //**********************************************************************************************
3399 
3400  //**Default addition assignment to dense vectors************************************************
3414  template< typename VT1 // Type of the left-hand side target vector
3415  , typename MT1 // Type of the left-hand side matrix operand
3416  , typename VT2 // Type of the right-hand side vector operand
3417  , typename ST2 > // Type of the scalar value
3418  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3419  {
3420  y.addAssign( A * x * scalar );
3421  }
3422  //**********************************************************************************************
3423 
3424  //**Default addition assignment to dense vectors (small matrices)*******************************
3438  template< typename VT1 // Type of the left-hand side target vector
3439  , typename MT1 // Type of the left-hand side matrix operand
3440  , typename VT2 // Type of the right-hand side vector operand
3441  , typename ST2 > // Type of the scalar value
3443  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3444  {
3445  selectDefaultAddAssignKernel( y, A, x, scalar );
3446  }
3447  //**********************************************************************************************
3448 
3449  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3464  template< typename VT1 // Type of the left-hand side target vector
3465  , typename MT1 // Type of the left-hand side matrix operand
3466  , typename VT2 // Type of the right-hand side vector operand
3467  , typename ST2 > // Type of the scalar value
3469  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3470  {
3471  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3472 
3473  const size_t M( A.rows() );
3474  const size_t N( A.columns() );
3475 
3476  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
3477  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
3478 
3479  const SIMDType factor( set( scalar ) );
3480 
3481  size_t i( 0UL );
3482 
3483  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
3484  {
3485  const size_t jbegin( ( IsUpper<MT1>::value )
3486  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3487  :( 0UL ) );
3488  const size_t jend( ( IsLower<MT1>::value )
3489  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3490  :( N ) );
3491  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3492 
3493  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3494 
3495  for( size_t j=jbegin; j<jend; ++j ) {
3496  const SIMDType x1( set( x[j] ) );
3497  xmm1 += A.load(i ,j) * x1;
3498  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3499  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3500  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3501  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
3502  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
3503  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
3504  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
3505  }
3506 
3507  y.store( i , y.load(i ) + xmm1*factor );
3508  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3509  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3510  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3511  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5*factor );
3512  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6*factor );
3513  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7*factor );
3514  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8*factor );
3515  }
3516 
3517  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
3518  {
3519  const size_t jbegin( ( IsUpper<MT1>::value )
3520  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3521  :( 0UL ) );
3522  const size_t jend( ( IsLower<MT1>::value )
3523  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3524  :( N ) );
3525  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3526 
3527  SIMDType xmm1, xmm2, xmm3, xmm4;
3528 
3529  for( size_t j=jbegin; j<jend; ++j ) {
3530  const SIMDType x1( set( x[j] ) );
3531  xmm1 += A.load(i ,j) * x1;
3532  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3533  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3534  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3535  }
3536 
3537  y.store( i , y.load(i ) + xmm1*factor );
3538  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3539  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3540  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3541  }
3542 
3543  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
3544  {
3545  const size_t jbegin( ( IsUpper<MT1>::value )
3546  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3547  :( 0UL ) );
3548  const size_t jend( ( IsLower<MT1>::value )
3549  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3550  :( N ) );
3551  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3552 
3553  SIMDType xmm1, xmm2, xmm3;
3554 
3555  for( size_t j=jbegin; j<jend; ++j ) {
3556  const SIMDType x1( set( x[j] ) );
3557  xmm1 += A.load(i ,j) * x1;
3558  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3559  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3560  }
3561 
3562  y.store( i , y.load(i ) + xmm1*factor );
3563  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3564  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3565  }
3566 
3567  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3568  {
3569  const size_t jbegin( ( IsUpper<MT1>::value )
3570  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3571  :( 0UL ) );
3572  const size_t jend( ( IsLower<MT1>::value )
3573  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3574  :( N ) );
3575  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3576 
3577  SIMDType xmm1, xmm2;
3578 
3579  for( size_t j=jbegin; j<jend; ++j ) {
3580  const SIMDType x1( set( x[j] ) );
3581  xmm1 += A.load(i ,j) * x1;
3582  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3583  }
3584 
3585  y.store( i , y.load(i ) + xmm1*factor );
3586  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2*factor );
3587  }
3588 
3589  for( ; i<ipos; i+=SIMDSIZE )
3590  {
3591  const size_t jbegin( ( IsUpper<MT1>::value )
3592  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3593  :( 0UL ) );
3594  const size_t jend( ( IsLower<MT1>::value )
3595  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3596  :( N ) );
3597  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3598 
3599  SIMDType xmm1;
3600 
3601  for( size_t j=jbegin; j<jend; ++j ) {
3602  xmm1 += A.load(i,j) * set( x[j] );
3603  }
3604 
3605  y.store( i, y.load(i) + xmm1*factor );
3606  }
3607 
3608  for( ; remainder && i<M; ++i )
3609  {
3610  const size_t jbegin( ( IsUpper<MT1>::value )
3611  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3612  :( 0UL ) );
3613  const size_t jend( ( IsLower<MT1>::value )
3614  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3615  :( N ) );
3616  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3617 
3618  ElementType value{};
3619 
3620  for( size_t j=jbegin; j<jend; ++j ) {
3621  value += A(i,j) * x[j];
3622  }
3623 
3624  y[i] += value * scalar;
3625  }
3626  }
3627  //**********************************************************************************************
3628 
3629  //**Default addition assignment to dense vectors (large matrices)*******************************
3643  template< typename VT1 // Type of the left-hand side target vector
3644  , typename MT1 // Type of the left-hand side matrix operand
3645  , typename VT2 // Type of the right-hand side vector operand
3646  , typename ST2 > // Type of the scalar value
3648  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3649  {
3650  selectDefaultAddAssignKernel( y, A, x, scalar );
3651  }
3652  //**********************************************************************************************
3653 
3654  //**Vectorized default addition assignment to dense vectors (large matrices)********************
3669  template< typename VT1 // Type of the left-hand side target vector
3670  , typename MT1 // Type of the left-hand side matrix operand
3671  , typename VT2 // Type of the right-hand side vector operand
3672  , typename ST2 > // Type of the scalar value
3674  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3675  {
3676  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3677 
3678  const size_t M( A.rows() );
3679  const size_t N( A.columns() );
3680 
3681  const size_t iblock( 32768UL / sizeof( ElementType ) );
3682  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3683 
3684  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
3685 
3686  const SIMDType factor( set( scalar ) );
3687 
3688  for( size_t ii=0U; ii<M; ii+=iblock ) {
3689  for( size_t jj=0UL; jj<N; jj+=jblock )
3690  {
3691  const size_t jend( min( jj+jblock, N ) );
3692  const size_t itmp( min( ii+iblock, M ) );
3693  const size_t iend( ( IsUpper<MT1>::value )
3694  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3695  :( itmp ) );
3696 
3697  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
3698  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
3699 
3700  size_t i( ( IsLower<MT1>::value )
3701  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
3702  :( ii ) );
3703 
3704  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
3705  {
3706  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3707 
3708  for( size_t j=jj; j<jend; ++j ) {
3709  const SIMDType x1( set( x[j] ) );
3710  xmm1 += A.load(i ,j) * x1;
3711  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3712  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3713  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3714  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
3715  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
3716  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
3717  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
3718  }
3719 
3720  y.store( i , y.load(i ) + xmm1*factor );
3721  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3722  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3723  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3724  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5*factor );
3725  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6*factor );
3726  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7*factor );
3727  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8*factor );
3728  }
3729 
3730  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
3731  {
3732  SIMDType xmm1, xmm2, xmm3, xmm4;
3733 
3734  for( size_t j=jj; j<jend; ++j ) {
3735  const SIMDType x1( set( x[j] ) );
3736  xmm1 += A.load(i ,j) * x1;
3737  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3738  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3739  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3740  }
3741 
3742  y.store( i , y.load(i ) + xmm1*factor );
3743  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3744  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3745  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3746  }
3747 
3748  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
3749  {
3750  SIMDType xmm1, xmm2, xmm3;
3751 
3752  for( size_t j=jj; j<jend; ++j ) {
3753  const SIMDType x1( set( x[j] ) );
3754  xmm1 += A.load(i ,j) * x1;
3755  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3756  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3757  }
3758 
3759  y.store( i , y.load(i ) + xmm1*factor );
3760  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3761  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3762  }
3763 
3764  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3765  {
3766  SIMDType xmm1, xmm2;
3767 
3768  for( size_t j=jj; j<jend; ++j ) {
3769  const SIMDType x1( set( x[j] ) );
3770  xmm1 += A.load(i ,j) * x1;
3771  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3772  }
3773 
3774  y.store( i , y.load(i ) + xmm1*factor );
3775  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2*factor );
3776  }
3777 
3778  for( ; i<ipos; i+=SIMDSIZE )
3779  {
3780  SIMDType xmm1;
3781 
3782  for( size_t j=jj; j<jend; ++j ) {
3783  xmm1 += A.load(i,j) * set( x[j] );
3784  }
3785 
3786  y.store( i, y.load(i) + xmm1*factor );
3787  }
3788 
3789  for( ; remainder && i<iend; ++i )
3790  {
3791  ElementType value{};
3792 
3793  for( size_t j=jj; j<jend; ++j ) {
3794  value += A(i,j) * x[j];
3795  }
3796 
3797  y[i] += value * scalar;
3798  }
3799  }
3800  }
3801  }
3802  //**********************************************************************************************
3803 
3804  //**BLAS-based addition assignment to dense vectors (default)***********************************
3818  template< typename VT1 // Type of the left-hand side target vector
3819  , typename MT1 // Type of the left-hand side matrix operand
3820  , typename VT2 // Type of the right-hand side vector operand
3821  , typename ST2 > // Type of the scalar value
3823  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3824  {
3825  selectLargeAddAssignKernel( y, A, x, scalar );
3826  }
3827  //**********************************************************************************************
3828 
3829  //**BLAS-based addition assignment to dense vectors*********************************************
3830 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
3831 
3844  template< typename VT1 // Type of the left-hand side target vector
3845  , typename MT1 // Type of the left-hand side matrix operand
3846  , typename VT2 // Type of the right-hand side vector operand
3847  , typename ST2 > // Type of the scalar value
3849  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3850  {
3851  using ET = ElementType_<VT1>;
3852 
3853  if( IsTriangular<MT1>::value ) {
3854  ResultType_<VT1> tmp( serial( scalar * x ) );
3855  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3856  addAssign( y, tmp );
3857  }
3858  else {
3859  gemv( y, A, x, ET(scalar), ET(1) );
3860  }
3861  }
3862 #endif
3863  //**********************************************************************************************
3864 
3865  //**Addition assignment to sparse vectors*******************************************************
3866  // No special implementation for the addition assignment to sparse vectors.
3867  //**********************************************************************************************
3868 
3869  //**Subtraction assignment to dense vectors*****************************************************
3881  template< typename VT1 > // Type of the target dense vector
3882  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3883  {
3885 
3886  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3887 
3888  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
3889  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
3890 
3891  if( left.rows() == 0UL || left.columns() == 0UL ) {
3892  return;
3893  }
3894 
3895  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3896  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3897 
3898  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3899  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3900  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3901  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3902 
3903  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
3904  }
3905  //**********************************************************************************************
3906 
3907  //**Subtraction assignment to dense vectors (kernel selection)**********************************
3918  template< typename VT1 // Type of the left-hand side target vector
3919  , typename MT1 // Type of the left-hand side matrix operand
3920  , typename VT2 // Type of the right-hand side vector operand
3921  , typename ST2 > // Type of the scalar value
3922  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3923  {
3924  if( ( IsDiagonal<MT1>::value ) ||
3925  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3926  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3927  selectSmallSubAssignKernel( y, A, x, scalar );
3928  else
3929  selectBlasSubAssignKernel( y, A, x, scalar );
3930  }
3931  //**********************************************************************************************
3932 
3933  //**Default subtraction assignment to dense vectors*********************************************
3947  template< typename VT1 // Type of the left-hand side target vector
3948  , typename MT1 // Type of the left-hand side matrix operand
3949  , typename VT2 // Type of the right-hand side vector operand
3950  , typename ST2 > // Type of the scalar value
3951  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3952  {
3953  y.subAssign( A * x * scalar );
3954  }
3955  //**********************************************************************************************
3956 
3957  //**Default subtraction assignment to dense vectors (small matrices)****************************
3971  template< typename VT1 // Type of the left-hand side target vector
3972  , typename MT1 // Type of the left-hand side matrix operand
3973  , typename VT2 // Type of the right-hand side vector operand
3974  , typename ST2 > // Type of the scalar value
3976  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3977  {
3978  selectDefaultSubAssignKernel( y, A, x, scalar );
3979  }
3980  //**********************************************************************************************
3981 
3982  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
3997  template< typename VT1 // Type of the left-hand side target vector
3998  , typename MT1 // Type of the left-hand side matrix operand
3999  , typename VT2 // Type of the right-hand side vector operand
4000  , typename ST2 > // Type of the scalar value
4002  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4003  {
4004  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
4005 
4006  const size_t M( A.rows() );
4007  const size_t N( A.columns() );
4008 
4009  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
4010  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
4011 
4012  const SIMDType factor( set( scalar ) );
4013 
4014  size_t i( 0UL );
4015 
4016  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
4017  {
4018  const size_t jbegin( ( IsUpper<MT1>::value )
4019  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4020  :( 0UL ) );
4021  const size_t jend( ( IsLower<MT1>::value )
4022  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4023  :( N ) );
4024  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4025 
4026  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4027 
4028  for( size_t j=jbegin; j<jend; ++j ) {
4029  const SIMDType x1( set( x[j] ) );
4030  xmm1 += A.load(i ,j) * x1;
4031  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4032  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4033  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4034  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
4035  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
4036  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
4037  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
4038  }
4039 
4040  y.store( i , y.load(i ) - xmm1*factor );
4041  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4042  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4043  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4044  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) - xmm5*factor );
4045  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) - xmm6*factor );
4046  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) - xmm7*factor );
4047  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) - xmm8*factor );
4048  }
4049 
4050  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
4051  {
4052  const size_t jbegin( ( IsUpper<MT1>::value )
4053  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4054  :( 0UL ) );
4055  const size_t jend( ( IsLower<MT1>::value )
4056  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4057  :( N ) );
4058  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4059 
4060  SIMDType xmm1, xmm2, xmm3, xmm4;
4061 
4062  for( size_t j=jbegin; j<jend; ++j ) {
4063  const SIMDType x1( set( x[j] ) );
4064  xmm1 += A.load(i ,j) * x1;
4065  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4066  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4067  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4068  }
4069 
4070  y.store( i , y.load(i ) - xmm1*factor );
4071  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4072  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4073  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4074  }
4075 
4076  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
4077  {
4078  const size_t jbegin( ( IsUpper<MT1>::value )
4079  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4080  :( 0UL ) );
4081  const size_t jend( ( IsLower<MT1>::value )
4082  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4083  :( N ) );
4084  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4085 
4086  SIMDType xmm1, xmm2, xmm3;
4087 
4088  for( size_t j=jbegin; j<jend; ++j ) {
4089  const SIMDType x1( set( x[j] ) );
4090  xmm1 += A.load(i ,j) * x1;
4091  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4092  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4093  }
4094 
4095  y.store( i , y.load(i ) - xmm1*factor );
4096  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4097  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4098  }
4099 
4100  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
4101  {
4102  const size_t jbegin( ( IsUpper<MT1>::value )
4103  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4104  :( 0UL ) );
4105  const size_t jend( ( IsLower<MT1>::value )
4106  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4107  :( N ) );
4108  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4109 
4110  SIMDType xmm1, xmm2;
4111 
4112  for( size_t j=jbegin; j<jend; ++j ) {
4113  const SIMDType x1( set( x[j] ) );
4114  xmm1 += A.load(i ,j) * x1;
4115  xmm2 += A.load(i+SIMDSIZE,j) * x1;
4116  }
4117 
4118  y.store( i , y.load(i ) - xmm1*factor );
4119  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) - xmm2*factor );
4120  }
4121 
4122  for( ; i<ipos; i+=SIMDSIZE )
4123  {
4124  const size_t jbegin( ( IsUpper<MT1>::value )
4125  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4126  :( 0UL ) );
4127  const size_t jend( ( IsLower<MT1>::value )
4128  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4129  :( N ) );
4130  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4131 
4132  SIMDType xmm1;
4133 
4134  for( size_t j=jbegin; j<jend; ++j ) {
4135  xmm1 += A.load(i,j) * set( x[j] );
4136  }
4137 
4138  y.store( i, y.load(i) - xmm1*factor );
4139  }
4140 
4141  for( ; remainder && i<M; ++i )
4142  {
4143  const size_t jbegin( ( IsUpper<MT1>::value )
4144  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4145  :( 0UL ) );
4146  const size_t jend( ( IsLower<MT1>::value )
4147  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4148  :( N ) );
4149  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4150 
4151  ElementType value{};
4152 
4153  for( size_t j=jbegin; j<jend; ++j ) {
4154  value += A(i,j) * x[j];
4155  }
4156 
4157  y[i] -= value * scalar;
4158  }
4159  }
4160  //**********************************************************************************************
4161 
4162  //**Default subtraction assignment to dense vectors (large matrices)****************************
4176  template< typename VT1 // Type of the left-hand side target vector
4177  , typename MT1 // Type of the left-hand side matrix operand
4178  , typename VT2 // Type of the right-hand side vector operand
4179  , typename ST2 > // Type of the scalar value
4181  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4182  {
4183  selectDefaultSubAssignKernel( y, A, x, scalar );
4184  }
4185  //**********************************************************************************************
4186 
4187  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4202  template< typename VT1 // Type of the left-hand side target vector
4203  , typename MT1 // Type of the left-hand side matrix operand
4204  , typename VT2 // Type of the right-hand side vector operand
4205  , typename ST2 > // Type of the scalar value
4207  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4208  {
4209  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
4210 
4211  const size_t M( A.rows() );
4212  const size_t N( A.columns() );
4213 
4214  const size_t iblock( 32768UL / sizeof( ElementType ) );
4215  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
4216 
4217  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
4218 
4219  const SIMDType factor( set( scalar ) );
4220 
4221  for( size_t ii=0U; ii<M; ii+=iblock ) {
4222  for( size_t jj=0UL; jj<N; jj+=jblock )
4223  {
4224  const size_t jend( min( jj+jblock, N ) );
4225  const size_t itmp( min( ii+iblock, M ) );
4226  const size_t iend( ( IsUpper<MT1>::value )
4227  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
4228  :( itmp ) );
4229 
4230  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
4231  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
4232 
4233  size_t i( ( IsLower<MT1>::value )
4234  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
4235  :( ii ) );
4236 
4237  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
4238  {
4239  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4240 
4241  for( size_t j=jj; j<jend; ++j ) {
4242  const SIMDType x1( set( x[j] ) );
4243  xmm1 += A.load(i ,j) * x1;
4244  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4245  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4246  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4247  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
4248  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
4249  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
4250  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
4251  }
4252 
4253  y.store( i , y.load(i ) - xmm1*factor );
4254  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4255  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4256  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4257  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) - xmm5*factor );
4258  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) - xmm6*factor );
4259  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) - xmm7*factor );
4260  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) - xmm8*factor );
4261  }
4262 
4263  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
4264  {
4265  SIMDType xmm1, xmm2, xmm3, xmm4;
4266 
4267  for( size_t j=jj; j<jend; ++j ) {
4268  const SIMDType x1( set( x[j] ) );
4269  xmm1 += A.load(i ,j) * x1;
4270  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4271  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4272  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4273  }
4274 
4275  y.store( i , y.load(i ) - xmm1*factor );
4276  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4277  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4278  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4279  }
4280 
4281  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
4282  {
4283  SIMDType xmm1, xmm2, xmm3;
4284 
4285  for( size_t j=jj; j<jend; ++j ) {
4286  const SIMDType x1( set( x[j] ) );
4287  xmm1 += A.load(i ,j) * x1;
4288  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4289  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4290  }
4291 
4292  y.store( i , y.load(i ) - xmm1*factor );
4293  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4294  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4295  }
4296 
4297  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
4298  {
4299  SIMDType xmm1, xmm2;
4300 
4301  for( size_t j=jj; j<jend; ++j ) {
4302  const SIMDType x1( set( x[j] ) );
4303  xmm1 += A.load(i ,j) * x1;
4304  xmm2 += A.load(i+SIMDSIZE,j) * x1;
4305  }
4306 
4307  y.store( i , y.load(i ) - xmm1*factor );
4308  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) - xmm2*factor );
4309  }
4310 
4311  for( ; i<ipos; i+=SIMDSIZE )
4312  {
4313  SIMDType xmm1;
4314 
4315  for( size_t j=jj; j<jend; ++j ) {
4316  xmm1 += A.load(i,j) * set( x[j] );
4317  }
4318 
4319  y.store( i, y.load(i) - xmm1*factor );
4320  }
4321 
4322  for( ; remainder && i<iend; ++i )
4323  {
4324  ElementType value{};
4325 
4326  for( size_t j=jj; j<jend; ++j ) {
4327  value += A(i,j) * x[j];
4328  }
4329 
4330  y[i] -= value * scalar;
4331  }
4332  }
4333  }
4334  }
4335  //**********************************************************************************************
4336 
4337  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4351  template< typename VT1 // Type of the left-hand side target vector
4352  , typename MT1 // Type of the left-hand side matrix operand
4353  , typename VT2 // Type of the right-hand side vector operand
4354  , typename ST2 > // Type of the scalar value
4356  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4357  {
4358  selectLargeSubAssignKernel( y, A, x, scalar );
4359  }
4360  //**********************************************************************************************
4361 
4362  //**BLAS-based subtraction assignment to dense vectors******************************************
4363 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
4364 
4377  template< typename VT1 // Type of the left-hand side target vector
4378  , typename MT1 // Type of the left-hand side matrix operand
4379  , typename VT2 // Type of the right-hand side vector operand
4380  , typename ST2 > // Type of the scalar value
4382  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4383  {
4384  using ET = ElementType_<VT1>;
4385 
4386  if( IsTriangular<MT1>::value ) {
4387  ResultType_<VT1> tmp( serial( scalar * x ) );
4388  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4389  subAssign( y, tmp );
4390  }
4391  else {
4392  gemv( y, A, x, ET(-scalar), ET(1) );
4393  }
4394  }
4395 #endif
4396  //**********************************************************************************************
4397 
4398  //**Subtraction assignment to sparse vectors****************************************************
4399  // No special implementation for the subtraction assignment to sparse vectors.
4400  //**********************************************************************************************
4401 
4402  //**Multiplication assignment to dense vectors**************************************************
4414  template< typename VT1 > // Type of the target dense vector
4415  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4416  {
4418 
4422 
4423  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4424 
4425  const ResultType tmp( serial( rhs ) );
4426  multAssign( ~lhs, tmp );
4427  }
4428  //**********************************************************************************************
4429 
4430  //**Multiplication assignment to sparse vectors*************************************************
4431  // No special implementation for the multiplication assignment to sparse vectors.
4432  //**********************************************************************************************
4433 
4434  //**Division assignment to dense vectors********************************************************
4446  template< typename VT1 > // Type of the target dense vector
4447  friend inline void divAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4448  {
4450 
4454 
4455  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4456 
4457  const ResultType tmp( serial( rhs ) );
4458  divAssign( ~lhs, tmp );
4459  }
4460  //**********************************************************************************************
4461 
4462  //**Division assignment to sparse vectors*******************************************************
4463  // No special implementation for the division assignment to sparse vectors.
4464  //**********************************************************************************************
4465 
4466  //**SMP assignment to dense vectors*************************************************************
4480  template< typename VT1 > // Type of the target dense vector
4481  friend inline EnableIf_< UseSMPAssign<VT1> >
4483  {
4485 
4486  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4487 
4488  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
4489  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
4490 
4491  if( left.rows() == 0UL ) {
4492  return;
4493  }
4494  else if( left.columns() == 0UL ) {
4495  reset( ~lhs );
4496  return;
4497  }
4498 
4499  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4500  RT x( right ); // Evaluation of the right-hand side dense vector operand
4501 
4502  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4503  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4504  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4505  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4506 
4507  smpAssign( ~lhs, A * x * rhs.scalar_ );
4508  }
4509  //**********************************************************************************************
4510 
4511  //**SMP assignment to sparse vectors************************************************************
4525  template< typename VT1 > // Type of the target sparse vector
4526  friend inline EnableIf_< UseSMPAssign<VT1> >
4528  {
4530 
4534 
4535  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4536 
4537  const ResultType tmp( rhs );
4538  smpAssign( ~lhs, tmp );
4539  }
4540  //**********************************************************************************************
4541 
4542  //**SMP addition assignment to dense vectors****************************************************
4556  template< typename VT1 > // Type of the target dense vector
4557  friend inline EnableIf_< UseSMPAssign<VT1> >
4559  {
4561 
4562  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4563 
4564  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
4565  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
4566 
4567  if( left.rows() == 0UL || left.columns() == 0UL ) {
4568  return;
4569  }
4570 
4571  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4572  RT x( right ); // Evaluation of the right-hand side dense vector operand
4573 
4574  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4575  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4576  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4577  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4578 
4579  smpAddAssign( ~lhs, A * x * rhs.scalar_ );
4580  }
4581  //**********************************************************************************************
4582 
4583  //**SMP addition assignment to sparse vectors***************************************************
4584  // No special implementation for the SMP addition assignment to sparse vectors.
4585  //**********************************************************************************************
4586 
4587  //**SMP subtraction assignment to dense vectors*************************************************
4601  template< typename VT1 > // Type of the target dense vector
4602  friend inline EnableIf_< UseSMPAssign<VT1> >
4604  {
4606 
4607  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4608 
4609  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
4610  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
4611 
4612  if( left.rows() == 0UL || left.columns() == 0UL ) {
4613  return;
4614  }
4615 
4616  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4617  RT x( right ); // Evaluation of the right-hand side dense vector operand
4618 
4619  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4620  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4621  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4622  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4623 
4624  smpSubAssign( ~lhs, A * x * rhs.scalar_ );
4625  }
4626  //**********************************************************************************************
4627 
4628  //**SMP subtraction assignment to sparse vectors************************************************
4629  // No special implementation for the SMP subtraction assignment to sparse vectors.
4630  //**********************************************************************************************
4631 
4632  //**SMP multiplication assignment to dense vectors**********************************************
4647  template< typename VT1 > // Type of the target dense vector
4648  friend inline EnableIf_< UseSMPAssign<VT1> >
4650  {
4652 
4656 
4657  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4658 
4659  const ResultType tmp( rhs );
4660  smpMultAssign( ~lhs, tmp );
4661  }
4662  //**********************************************************************************************
4663 
4664  //**SMP multiplication assignment to sparse vectors*********************************************
4665  // No special implementation for the SMP multiplication assignment to sparse vectors.
4666  //**********************************************************************************************
4667 
4668  //**SMP division assignment to dense vectors****************************************************
4682  template< typename VT1 > // Type of the target dense vector
4683  friend inline EnableIf_< UseSMPAssign<VT1> >
4685  {
4687 
4691 
4692  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4693 
4694  const ResultType tmp( rhs );
4695  smpDivAssign( ~lhs, tmp );
4696  }
4697  //**********************************************************************************************
4698 
4699  //**SMP division assignment to sparse vectors***************************************************
4700  // No special implementation for the SMP division assignment to sparse vectors.
4701  //**********************************************************************************************
4702 
4703  //**Compile time checks*************************************************************************
4712  //**********************************************************************************************
4713 };
4715 //*************************************************************************************************
4716 
4717 
4718 
4719 
4720 //=================================================================================================
4721 //
4722 // GLOBAL BINARY ARITHMETIC OPERATORS
4723 //
4724 //=================================================================================================
4725 
4726 //*************************************************************************************************
4757 template< typename MT // Type of the left-hand side dense matrix
4758  , typename VT > // Type of the right-hand side dense vector
4759 inline decltype(auto)
4760  operator*( const DenseMatrix<MT,true>& mat, const DenseVector<VT,false>& vec )
4761 {
4763 
4765 
4766  if( (~mat).columns() != (~vec).size() ) {
4767  BLAZE_THROW_INVALID_ARGUMENT( "Matrix and vector sizes do not match" );
4768  }
4769 
4770  using ReturnType = const TDMatDVecMultExpr<MT,VT>;
4771  return ReturnType( ~mat, ~vec );
4772 }
4773 //*************************************************************************************************
4774 
4775 
4776 
4777 
4778 //=================================================================================================
4779 //
4780 // SIZE SPECIALIZATIONS
4781 //
4782 //=================================================================================================
4783 
4784 //*************************************************************************************************
4786 template< typename MT, typename VT >
4787 struct Size< TDMatDVecMultExpr<MT,VT>, 0UL >
4788  : public Size<MT,0UL>
4789 {};
4791 //*************************************************************************************************
4792 
4793 
4794 
4795 
4796 //=================================================================================================
4797 //
4798 // ISALIGNED SPECIALIZATIONS
4799 //
4800 //=================================================================================================
4801 
4802 //*************************************************************************************************
4804 template< typename MT, typename VT >
4805 struct IsAligned< TDMatDVecMultExpr<MT,VT> >
4806  : public And< IsAligned<MT>, IsAligned<VT> >
4807 {};
4809 //*************************************************************************************************
4810 
4811 } // namespace blaze
4812 
4813 #endif
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:329
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Data type constraint.
IfTrue_< evaluateMatrix, const MRT, MCT > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:220
Headerfile for the generic min algorithm.
Header file for the blaze::checked and blaze::unchecked instances.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:71
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:210
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
ReturnType at(size_t index) const
Checked access to the vector elements.
Definition: TDMatDVecMultExpr.h:297
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:86
Header file for basic type definitions.
ElementType_< VRT > VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:132
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: ColumnVector.h:61
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the serial shim.
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the IsSame and IsStrictlySame type traits.
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:364
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:172
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:316
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:588
ResultType_< MT > MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:129
CompositeType_< VT > VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:134
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
typename DisableIf< Condition, T >::Type DisableIf_
Auxiliary type for the DisableIf class template.The DisableIf_ alias declaration provides a convenien...
Definition: DisableIf.h:224
size_t size() const noexcept
Returns the current size/dimension of the vector.
Definition: DVecScalarMultExpr.h:519
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:87
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:171
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:291
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member constant is set to true, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to false, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:140
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:87
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the VecScalarMultExpr base class.
Header file for the IsBLASCompatible type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:343
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1950
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:80
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:149
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Header file for the IsComplexDouble type trait.
MultTrait_< MRT, VRT > ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:206
Constraint on the transpose flag of vector types.
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:71
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Compile time check for the memory layout of data types.This type trait tests whether the given data t...
Definition: IsContiguous.h:86
Headerfile for the generic max algorithm.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the IsDouble type trait.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:374
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:364
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:211
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:263
Header file for the HasSIMDAdd type trait.
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:342
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:320
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:76
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:71
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:89
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:506
CompositeType_< MT > MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:133
#define BLAZE_CONSTRAINT_MUST_NOT_BE_MATMATMULTEXPR_TYPE(T)
Constraint on the data type.In case the given data type T is a matrix/matrix multiplication expressio...
Definition: MatMatMultExpr.h:87
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBLASCompatible.h:79
Header file for the IsTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:108
ResultType_< VT > VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:130
Constraint on the data type.
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatDVecMultExpr.h:209
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Constraint on the data type.
LeftOperand vector_
Left-hand side dense vector of the multiplication expression.
Definition: DVecScalarMultExpr.h:590
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the IsPadded type trait.
typename T::LeftOperand LeftOperand_
Alias declaration for nested LeftOperand type definitions.The LeftOperand_ alias declaration provides...
Definition: Aliases.h:203
Header file for the HasConstDataAccess type trait.
RightOperand scalar_
Right-hand side scalar of the multiplication expression.
Definition: DVecScalarMultExpr.h:591
If_< IsExpression< MT >, const MT, const MT &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:214
System settings for the BLAS mode.
Base class for all vector/scalar multiplication expression templates.The VecScalarMultExpr class serv...
Definition: VecScalarMultExpr.h:67
ElementType_< ResultType > ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:208
Header file for the IsSIMDCombinable type trait.
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
IfTrue_< evaluateVector, const VRT, VCT > RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:223
Header file for the IsContiguous type trait.
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:154
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:131
size_t size() const noexcept
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:310
Header file for BLAS triangular matrix/vector multiplication functions (trmv)
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:61
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:207
Compile time check for built-in data types.This type trait tests whether or not the given template pa...
Definition: IsBuiltin.h:75
ElementType_< MRT > MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:131
Constraints on the storage order of matrix types.
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:816
#define BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
Compilation switch for the BLAS matrix/vector multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:93
Header file for the HasMutableDataAccess type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the BLAS matrix/matrix multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:152
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:224
Header file for BLAS general matrix/vector multiplication functions (gemv)
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3080
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:61
Compile time check for complex types.This type trait tests whether or not the given template paramete...
Definition: IsComplex.h:76
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:64
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Expression object for dense vector-scalar multiplications.The DVecScalarMultExpr class represents the...
Definition: DVecScalarMultExpr.h:104
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:66
Compile time evaluation of the size of vectors and matrices.The Size type trait evaluates the size of...
Definition: Size.h:80
RightOperand rightOperand() const noexcept
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:330
Base class for sparse vectors.The SparseVector class is a base class for all arbitrarily sized (N-dim...
Definition: Forward.h:130
Header file for the IsComplexFloat type trait.
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:354
Header file for the IsComplex type trait.
Compile time logical &#39;and&#39; evaluation.The And alias declaration performs at compile time a logical &#39;a...
Definition: And.h:76
Constraint on the data type.
Header file for the complex data type.
typename T::RightOperand RightOperand_
Alias declaration for nested RightOperand type definitions.The RightOperand_ alias declaration provid...
Definition: Aliases.h:383
If_< IsExpression< VT >, const VT, const VT &> RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:217
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:423
Header file for the IsUpper type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:386
Header file for the MatVecMultExpr base class.
Constraint on the data type.
TDMatDVecMultExpr(const MT &mat, const VT &vec) noexcept
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:249
Header file for the Size type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:387
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the function trace functionality.