TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemv.h>
44 #include <blaze/math/blas/trmv.h>
45 #include <blaze/math/Aliases.h>
53 #include <blaze/math/Exception.h>
59 #include <blaze/math/shims/Reset.h>
61 #include <blaze/math/SIMD.h>
81 #include <blaze/system/BLAS.h>
86 #include <blaze/util/Assert.h>
87 #include <blaze/util/Complex.h>
89 #include <blaze/util/DisableIf.h>
90 #include <blaze/util/EnableIf.h>
93 #include <blaze/util/mpl/And.h>
94 #include <blaze/util/mpl/If.h>
95 #include <blaze/util/Types.h>
103 
104 
105 namespace blaze {
106 
107 //=================================================================================================
108 //
109 // CLASS TDMATDVECMULTEXPR
110 //
111 //=================================================================================================
112 
113 //*************************************************************************************************
120 template< typename MT // Type of the left-hand side dense matrix
121  , typename VT > // Type of the right-hand side dense vector
122 class TDMatDVecMultExpr
123  : public MatVecMultExpr< DenseVector< TDMatDVecMultExpr<MT,VT>, false > >
124  , private Computation
125 {
126  private:
127  //**Type definitions****************************************************************************
134  //**********************************************************************************************
135 
136  //**********************************************************************************************
138  enum : bool { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144  enum : bool { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
145  //**********************************************************************************************
146 
147  //**********************************************************************************************
149 
153  template< typename T1 >
154  struct UseSMPAssign {
155  enum : bool { value = ( evaluateMatrix || evaluateVector ) };
156  };
158  //**********************************************************************************************
159 
160  //**********************************************************************************************
162 
165  template< typename T1, typename T2, typename T3 >
166  struct UseBlasKernel {
172  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
177  IsSame< ElementType_<T1>, ElementType_<T3> >::value };
178  };
180  //**********************************************************************************************
181 
182  //**********************************************************************************************
184 
188  template< typename T1, typename T2, typename T3 >
189  struct UseVectorizedDefaultKernel {
190  enum : bool { value = useOptimizedKernels &&
192  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
195  , ElementType_<T3> >::value &&
198  };
200  //**********************************************************************************************
201 
202  public:
203  //**Type definitions****************************************************************************
209  using ReturnType = const ElementType;
210  using CompositeType = const ResultType;
211 
213  using LeftOperand = If_< IsExpression<MT>, const MT, const MT& >;
214 
216  using RightOperand = If_< IsExpression<VT>, const VT, const VT& >;
217 
220 
223  //**********************************************************************************************
224 
225  //**Compilation flags***************************************************************************
227  enum : bool { simdEnabled = !IsDiagonal<MT>::value &&
228  MT::simdEnabled && VT::simdEnabled &&
231 
233  enum : bool { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
234  !evaluateVector && VT::smpAssignable };
235  //**********************************************************************************************
236 
237  //**SIMD properties*****************************************************************************
239  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
240  //**********************************************************************************************
241 
242  //**Constructor*********************************************************************************
248  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec ) noexcept
249  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
250  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
251  {
252  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
253  }
254  //**********************************************************************************************
255 
256  //**Subscript operator**************************************************************************
262  inline ReturnType operator[]( size_t index ) const {
263  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
264 
266  {
267  return mat_(index,index) * vec_[index];
268  }
269  else if( IsLower<MT>::value && ( index + 8UL < mat_.rows() ) )
270  {
271  const size_t n( IsStrictlyLower<MT>::value ? index : index+1UL );
272  return subvector( row( mat_, index ), 0UL, n ) * subvector( vec_, 0UL, n );
273  }
274  else if( IsUpper<MT>::value && ( index > 8UL ) )
275  {
276  const size_t begin( IsStrictlyUpper<MT>::value ? index+1UL : index );
277  const size_t n ( mat_.columns() - begin );
278  return subvector( row( mat_, index ), begin, n ) * subvector( vec_, begin, n );
279  }
280  else
281  {
282  return row( mat_, index ) * vec_;
283  }
284  }
285  //**********************************************************************************************
286 
287  //**At function*********************************************************************************
294  inline ReturnType at( size_t index ) const {
295  if( index >= mat_.rows() ) {
296  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
297  }
298  return (*this)[index];
299  }
300  //**********************************************************************************************
301 
302  //**Size function*******************************************************************************
307  inline size_t size() const noexcept {
308  return mat_.rows();
309  }
310  //**********************************************************************************************
311 
312  //**Left operand access*************************************************************************
317  inline LeftOperand leftOperand() const noexcept {
318  return mat_;
319  }
320  //**********************************************************************************************
321 
322  //**Right operand access************************************************************************
327  inline RightOperand rightOperand() const noexcept {
328  return vec_;
329  }
330  //**********************************************************************************************
331 
332  //**********************************************************************************************
338  template< typename T >
339  inline bool canAlias( const T* alias ) const noexcept {
340  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
341  }
342  //**********************************************************************************************
343 
344  //**********************************************************************************************
350  template< typename T >
351  inline bool isAliased( const T* alias ) const noexcept {
352  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
353  }
354  //**********************************************************************************************
355 
356  //**********************************************************************************************
361  inline bool isAligned() const noexcept {
362  return mat_.isAligned() && vec_.isAligned();
363  }
364  //**********************************************************************************************
365 
366  //**********************************************************************************************
371  inline bool canSMPAssign() const noexcept {
372  return ( !BLAZE_BLAS_MODE ||
375  ( IsComputation<MT>::value && !evaluateMatrix ) ||
376  ( mat_.rows() * mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
377  ( size() > SMP_TDMATDVECMULT_THRESHOLD );
378  }
379  //**********************************************************************************************
380 
381  private:
382  //**Member variables****************************************************************************
385  //**********************************************************************************************
386 
387  //**Assignment to dense vectors*****************************************************************
400  template< typename VT1 > // Type of the target dense vector
401  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
402  {
404 
405  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
406 
407  if( rhs.mat_.rows() == 0UL ) {
408  return;
409  }
410  else if( rhs.mat_.columns() == 0UL ) {
411  reset( ~lhs );
412  return;
413  }
414 
415  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
416  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
417 
418  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
419  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
420  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
421  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
422 
423  TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
424  }
426  //**********************************************************************************************
427 
428  //**Assignment to dense vectors (kernel selection)**********************************************
439  template< typename VT1 // Type of the left-hand side target vector
440  , typename MT1 // Type of the left-hand side matrix operand
441  , typename VT2 > // Type of the right-hand side vector operand
442  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
443  {
444  if( ( IsDiagonal<MT1>::value ) ||
445  ( IsComputation<MT>::value && !evaluateMatrix ) ||
446  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
447  selectSmallAssignKernel( y, A, x );
448  else
449  selectBlasAssignKernel( y, A, x );
450  }
452  //**********************************************************************************************
453 
454  //**Default assignment to dense vectors*********************************************************
468  template< typename VT1 // Type of the left-hand side target vector
469  , typename MT1 // Type of the left-hand side matrix operand
470  , typename VT2 > // Type of the right-hand side vector operand
471  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
472  {
473  const size_t M( A.rows() );
474  const size_t N( A.columns() );
475 
477  reset( y[0] );
478  }
479 
480  if( !IsUpper<MT1>::value )
481  {
482  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
483  y[i] = A(i,0UL) * x[0UL];
484  }
485  }
486 
487  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
488  {
490  {
491  y[j] = A(j,j) * x[j];
492  }
493  else
494  {
495  const size_t ibegin( ( IsLower<MT1>::value )
496  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
497  :( 0UL ) );
498  const size_t iend( ( IsUpper<MT1>::value )
499  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
500  :( M ) );
501  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
502 
503  const size_t inum( iend - ibegin );
504  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
505 
506  for( size_t i=ibegin; i<ipos; i+=2UL ) {
507  y[i ] += A(i ,j) * x[j];
508  y[i+1UL] += A(i+1UL,j) * x[j];
509  }
510  if( ipos < iend ) {
511  y[ipos] += A(ipos,j) * x[j];
512  }
513  if( IsUpper<MT1>::value ) {
514  y[iend] = A(iend,j) * x[j];
515  }
516  }
517  }
518 
520  reset( y[M-1UL] );
521  }
522  }
524  //**********************************************************************************************
525 
526  //**Default assignment to dense vectors (small matrices)****************************************
540  template< typename VT1 // Type of the left-hand side target vector
541  , typename MT1 // Type of the left-hand side matrix operand
542  , typename VT2 > // Type of the right-hand side vector operand
544  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
545  {
546  selectDefaultAssignKernel( y, A, x );
547  }
549  //**********************************************************************************************
550 
551  //**Vectorized default assignment to dense vectors (small matrices)*****************************
565  template< typename VT1 // Type of the left-hand side target vector
566  , typename MT1 // Type of the left-hand side matrix operand
567  , typename VT2 > // Type of the right-hand side vector operand
569  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
570  {
571  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
572 
573  const size_t M( A.rows() );
574  const size_t N( A.columns() );
575 
576  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
577  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
578 
579  size_t i( 0UL );
580 
581  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
582  {
583  const size_t jbegin( ( IsUpper<MT1>::value )
584  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
585  :( 0UL ) );
586  const size_t jend( ( IsLower<MT1>::value )
587  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
588  :( N ) );
589  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
590 
591  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
592 
593  for( size_t j=jbegin; j<jend; ++j ) {
594  const SIMDType x1( set( x[j] ) );
595  xmm1 += A.load(i ,j) * x1;
596  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
597  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
598  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
599  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
600  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
601  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
602  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
603  }
604 
605  y.store( i , xmm1 );
606  y.store( i+SIMDSIZE , xmm2 );
607  y.store( i+SIMDSIZE*2UL, xmm3 );
608  y.store( i+SIMDSIZE*3UL, xmm4 );
609  y.store( i+SIMDSIZE*4UL, xmm5 );
610  y.store( i+SIMDSIZE*5UL, xmm6 );
611  y.store( i+SIMDSIZE*6UL, xmm7 );
612  y.store( i+SIMDSIZE*7UL, xmm8 );
613  }
614 
615  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
616  {
617  const size_t jbegin( ( IsUpper<MT1>::value )
618  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
619  :( 0UL ) );
620  const size_t jend( ( IsLower<MT1>::value )
621  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
622  :( N ) );
623  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
624 
625  SIMDType xmm1, xmm2, xmm3, xmm4;
626 
627  for( size_t j=jbegin; j<jend; ++j ) {
628  const SIMDType x1( set( x[j] ) );
629  xmm1 += A.load(i ,j) * x1;
630  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
631  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
632  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
633  }
634 
635  y.store( i , xmm1 );
636  y.store( i+SIMDSIZE , xmm2 );
637  y.store( i+SIMDSIZE*2UL, xmm3 );
638  y.store( i+SIMDSIZE*3UL, xmm4 );
639  }
640 
641  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
642  {
643  const size_t jbegin( ( IsUpper<MT1>::value )
644  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
645  :( 0UL ) );
646  const size_t jend( ( IsLower<MT1>::value )
647  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
648  :( N ) );
649  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
650 
651  SIMDType xmm1, xmm2, xmm3;
652 
653  for( size_t j=jbegin; j<jend; ++j ) {
654  const SIMDType x1( set( x[j] ) );
655  xmm1 += A.load(i ,j) * x1;
656  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
657  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
658  }
659 
660  y.store( i , xmm1 );
661  y.store( i+SIMDSIZE , xmm2 );
662  y.store( i+SIMDSIZE*2UL, xmm3 );
663  }
664 
665  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
666  {
667  const size_t jbegin( ( IsUpper<MT1>::value )
668  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
669  :( 0UL ) );
670  const size_t jend( ( IsLower<MT1>::value )
671  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
672  :( N ) );
673  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
674 
675  SIMDType xmm1, xmm2;
676 
677  for( size_t j=jbegin; j<jend; ++j ) {
678  const SIMDType x1( set( x[j] ) );
679  xmm1 += A.load(i ,j) * x1;
680  xmm2 += A.load(i+SIMDSIZE,j) * x1;
681  }
682 
683  y.store( i , xmm1 );
684  y.store( i+SIMDSIZE, xmm2 );
685  }
686 
687  for( ; i<ipos; i+=SIMDSIZE )
688  {
689  const size_t jbegin( ( IsUpper<MT1>::value )
690  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
691  :( 0UL ) );
692  const size_t jend( ( IsLower<MT1>::value )
693  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
694  :( N ) );
695  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
696 
697  SIMDType xmm1;
698 
699  for( size_t j=jbegin; j<jend; ++j ) {
700  xmm1 += A.load(i,j) * set( x[j] );
701  }
702 
703  y.store( i, xmm1 );
704  }
705 
706  for( ; remainder && i<M; ++i )
707  {
708  const size_t jbegin( ( IsUpper<MT1>::value )
709  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
710  :( 0UL ) );
711  const size_t jend( ( IsLower<MT1>::value )
712  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
713  :( N ) );
714  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
715 
716  ElementType value = ElementType();
717 
718  for( size_t j=jbegin; j<jend; ++j ) {
719  value += A(i,j) * x[j];
720  }
721 
722  y[i] = value;
723  }
724  }
726  //**********************************************************************************************
727 
728  //**Default assignment to dense vectors (large matrices)****************************************
742  template< typename VT1 // Type of the left-hand side target vector
743  , typename MT1 // Type of the left-hand side matrix operand
744  , typename VT2 > // Type of the right-hand side vector operand
746  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
747  {
748  selectDefaultAssignKernel( y, A, x );
749  }
751  //**********************************************************************************************
752 
753  //**Vectorized default assignment to dense vectors (large matrices)*****************************
767  template< typename VT1 // Type of the left-hand side target vector
768  , typename MT1 // Type of the left-hand side matrix operand
769  , typename VT2 > // Type of the right-hand side vector operand
771  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
772  {
773  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
774 
775  const size_t M( A.rows() );
776  const size_t N( A.columns() );
777 
778  const size_t iblock( 32768UL / sizeof( ElementType ) );
779  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
780 
781  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
782 
783  reset( y );
784 
785  for( size_t ii=0U; ii<M; ii+=iblock ) {
786  for( size_t jj=0UL; jj<N; jj+=jblock )
787  {
788  const size_t jend( min( jj+jblock, N ) );
789  const size_t itmp( min( ii+iblock, M ) );
790  const size_t iend( ( IsUpper<MT1>::value )
791  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
792  :( itmp ) );
793 
794  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
795  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
796 
797  size_t i( ( IsLower<MT1>::value )
798  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
799  :( ii ) );
800 
801  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
802  {
803  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
804 
805  for( size_t j=jj; j<jend; ++j ) {
806  const SIMDType x1( set( x[j] ) );
807  xmm1 += A.load(i ,j) * x1;
808  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
809  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
810  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
811  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
812  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
813  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
814  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
815  }
816 
817  y.store( i , y.load(i ) + xmm1 );
818  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
819  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
820  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
821  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5 );
822  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6 );
823  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7 );
824  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8 );
825  }
826 
827  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
828  {
829  SIMDType xmm1, xmm2, xmm3, xmm4;
830 
831  for( size_t j=jj; j<jend; ++j ) {
832  const SIMDType x1( set( x[j] ) );
833  xmm1 += A.load(i ,j) * x1;
834  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
835  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
836  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
837  }
838 
839  y.store( i , y.load(i ) + xmm1 );
840  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
841  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
842  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
843  }
844 
845  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
846  {
847  SIMDType xmm1, xmm2, xmm3;
848 
849  for( size_t j=jj; j<jend; ++j ) {
850  const SIMDType x1( set( x[j] ) );
851  xmm1 += A.load(i ,j) * x1;
852  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
853  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
854  }
855 
856  y.store( i , y.load(i ) + xmm1 );
857  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
858  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
859  }
860 
861  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
862  {
863  SIMDType xmm1, xmm2;
864 
865  for( size_t j=jj; j<jend; ++j ) {
866  const SIMDType x1( set( x[j] ) );
867  xmm1 += A.load(i ,j) * x1;
868  xmm2 += A.load(i+SIMDSIZE,j) * x1;
869  }
870 
871  y.store( i , y.load(i ) + xmm1 );
872  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2 );
873  }
874 
875  for( ; i<ipos; i+=SIMDSIZE )
876  {
877  SIMDType xmm1;
878 
879  for( size_t j=jj; j<jend; ++j ) {
880  xmm1 += A.load(i,j) * set( x[j] );
881  }
882 
883  y.store( i, y.load(i) + xmm1 );
884  }
885 
886  for( ; remainder && i<iend; ++i )
887  {
888  ElementType value = ElementType();
889 
890  for( size_t j=jj; j<jend; ++j ) {
891  value += A(i,j) * x[j];
892  }
893 
894  y[i] += value;
895  }
896  }
897  }
898  }
900  //**********************************************************************************************
901 
902  //**BLAS-based assignment to dense vectors (default)********************************************
916  template< typename VT1 // Type of the left-hand side target vector
917  , typename MT1 // Type of the left-hand side matrix operand
918  , typename VT2 > // Type of the right-hand side vector operand
920  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
921  {
922  selectLargeAssignKernel( y, A, x );
923  }
925  //**********************************************************************************************
926 
927  //**BLAS-based assignment to dense vectors******************************************************
928 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
929 
942  template< typename VT1 // Type of the left-hand side target vector
943  , typename MT1 // Type of the left-hand side matrix operand
944  , typename VT2 > // Type of the right-hand side vector operand
946  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
947  {
948  using ET = ElementType_<VT1>;
949 
951  assign( y, x );
952  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
953  }
954  else {
955  gemv( y, A, x, ET(1), ET(0) );
956  }
957  }
959 #endif
960  //**********************************************************************************************
961 
962  //**Assignment to sparse vectors****************************************************************
975  template< typename VT1 > // Type of the target sparse vector
976  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
977  {
979 
983 
984  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
985 
986  const ResultType tmp( serial( rhs ) );
987  assign( ~lhs, tmp );
988  }
990  //**********************************************************************************************
991 
992  //**Addition assignment to dense vectors********************************************************
1005  template< typename VT1 > // Type of the target dense vector
1006  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1007  {
1009 
1010  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1011 
1012  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1013  return;
1014  }
1015 
1016  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1017  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1018 
1019  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1020  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1021  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1022  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1023 
1024  TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
1025  }
1027  //**********************************************************************************************
1028 
1029  //**Addition assignment to dense vectors (kernel selection)*************************************
1040  template< typename VT1 // Type of the left-hand side target vector
1041  , typename MT1 // Type of the left-hand side matrix operand
1042  , typename VT2 > // Type of the right-hand side vector operand
1043  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1044  {
1045  if( ( IsDiagonal<MT1>::value ) ||
1046  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1047  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1048  selectSmallAddAssignKernel( y, A, x );
1049  else
1050  selectBlasAddAssignKernel( y, A, x );
1051  }
1053  //**********************************************************************************************
1054 
1055  //**Default addition assignment to dense vectors************************************************
1069  template< typename VT1 // Type of the left-hand side target vector
1070  , typename MT1 // Type of the left-hand side matrix operand
1071  , typename VT2 > // Type of the right-hand side vector operand
1072  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1073  {
1074  const size_t M( A.rows() );
1075  const size_t N( A.columns() );
1076 
1077  for( size_t j=0UL; j<N; ++j )
1078  {
1080  {
1081  y[j] += A(j,j) * x[j];
1082  }
1083  else
1084  {
1085  const size_t ibegin( ( IsLower<MT1>::value )
1086  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1087  :( 0UL ) );
1088  const size_t iend( ( IsUpper<MT1>::value )
1089  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1090  :( M ) );
1091  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1092 
1093  const size_t inum( iend - ibegin );
1094  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1095 
1096  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1097  y[i ] += A(i ,j) * x[j];
1098  y[i+1UL] += A(i+1UL,j) * x[j];
1099  }
1100  if( ipos < iend ) {
1101  y[ipos] += A(ipos,j) * x[j];
1102  }
1103  }
1104  }
1105  }
1107  //**********************************************************************************************
1108 
1109  //**Default addition assignment to dense vectors (small matrices)*******************************
1123  template< typename VT1 // Type of the left-hand side target vector
1124  , typename MT1 // Type of the left-hand side matrix operand
1125  , typename VT2 > // Type of the right-hand side vector operand
1127  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1128  {
1129  selectDefaultAddAssignKernel( y, A, x );
1130  }
1132  //**********************************************************************************************
1133 
1134  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1148  template< typename VT1 // Type of the left-hand side target vector
1149  , typename MT1 // Type of the left-hand side matrix operand
1150  , typename VT2 > // Type of the right-hand side vector operand
1152  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1153  {
1154  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1155 
1156  const size_t M( A.rows() );
1157  const size_t N( A.columns() );
1158 
1159  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
1160  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1161 
1162  size_t i( 0UL );
1163 
1164  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1165  {
1166  const size_t jbegin( ( IsUpper<MT1>::value )
1167  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1168  :( 0UL ) );
1169  const size_t jend( ( IsLower<MT1>::value )
1170  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1171  :( N ) );
1172  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1173 
1174  SIMDType xmm1( y.load(i ) );
1175  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1176  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1177  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1178  SIMDType xmm5( y.load(i+SIMDSIZE*4UL) );
1179  SIMDType xmm6( y.load(i+SIMDSIZE*5UL) );
1180  SIMDType xmm7( y.load(i+SIMDSIZE*6UL) );
1181  SIMDType xmm8( y.load(i+SIMDSIZE*7UL) );
1182 
1183  for( size_t j=jbegin; j<jend; ++j ) {
1184  const SIMDType x1( set( x[j] ) );
1185  xmm1 += A.load(i ,j) * x1;
1186  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1187  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1188  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1189  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
1190  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
1191  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
1192  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
1193  }
1194 
1195  y.store( i , xmm1 );
1196  y.store( i+SIMDSIZE , xmm2 );
1197  y.store( i+SIMDSIZE*2UL, xmm3 );
1198  y.store( i+SIMDSIZE*3UL, xmm4 );
1199  y.store( i+SIMDSIZE*4UL, xmm5 );
1200  y.store( i+SIMDSIZE*5UL, xmm6 );
1201  y.store( i+SIMDSIZE*6UL, xmm7 );
1202  y.store( i+SIMDSIZE*7UL, xmm8 );
1203  }
1204 
1205  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1206  {
1207  const size_t jbegin( ( IsUpper<MT1>::value )
1208  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1209  :( 0UL ) );
1210  const size_t jend( ( IsLower<MT1>::value )
1211  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1212  :( N ) );
1213  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1214 
1215  SIMDType xmm1( y.load(i ) );
1216  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1217  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1218  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1219 
1220  for( size_t j=jbegin; j<jend; ++j ) {
1221  const SIMDType x1( set( x[j] ) );
1222  xmm1 += A.load(i ,j) * x1;
1223  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1224  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1225  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1226  }
1227 
1228  y.store( i , xmm1 );
1229  y.store( i+SIMDSIZE , xmm2 );
1230  y.store( i+SIMDSIZE*2UL, xmm3 );
1231  y.store( i+SIMDSIZE*3UL, xmm4 );
1232  }
1233 
1234  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
1235  {
1236  const size_t jbegin( ( IsUpper<MT1>::value )
1237  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1238  :( 0UL ) );
1239  const size_t jend( ( IsLower<MT1>::value )
1240  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1241  :( N ) );
1242  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1243 
1244  SIMDType xmm1( y.load(i ) );
1245  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1246  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1247 
1248  for( size_t j=jbegin; j<jend; ++j ) {
1249  const SIMDType x1( set( x[j] ) );
1250  xmm1 += A.load(i ,j) * x1;
1251  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1252  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1253  }
1254 
1255  y.store( i , xmm1 );
1256  y.store( i+SIMDSIZE , xmm2 );
1257  y.store( i+SIMDSIZE*2UL, xmm3 );
1258  }
1259 
1260  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
1261  {
1262  const size_t jbegin( ( IsUpper<MT1>::value )
1263  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1264  :( 0UL ) );
1265  const size_t jend( ( IsLower<MT1>::value )
1266  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1267  :( N ) );
1268  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1269 
1270  SIMDType xmm1( y.load(i ) );
1271  SIMDType xmm2( y.load(i+SIMDSIZE) );
1272 
1273  for( size_t j=jbegin; j<jend; ++j ) {
1274  const SIMDType x1( set( x[j] ) );
1275  xmm1 += A.load(i ,j) * x1;
1276  xmm2 += A.load(i+SIMDSIZE,j) * x1;
1277  }
1278 
1279  y.store( i , xmm1 );
1280  y.store( i+SIMDSIZE, xmm2 );
1281  }
1282 
1283  for( ; i<ipos; i+=SIMDSIZE )
1284  {
1285  const size_t jbegin( ( IsUpper<MT1>::value )
1286  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1287  :( 0UL ) );
1288  const size_t jend( ( IsLower<MT1>::value )
1289  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1290  :( N ) );
1291  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1292 
1293  SIMDType xmm1( y.load(i) );
1294 
1295  for( size_t j=jbegin; j<jend; ++j ) {
1296  xmm1 += A.load(i,j) * set( x[j] );
1297  }
1298 
1299  y.store( i, xmm1 );
1300  }
1301 
1302  for( ; remainder && i<M; ++i )
1303  {
1304  const size_t jbegin( ( IsUpper<MT1>::value )
1305  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1306  :( 0UL ) );
1307  const size_t jend( ( IsLower<MT1>::value )
1308  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1309  :( N ) );
1310  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1311 
1312  ElementType value = ElementType();
1313 
1314  for( size_t j=jbegin; j<jend; ++j ) {
1315  value += A(i,j) * x[j];
1316  }
1317 
1318  y[i] += value;
1319  }
1320  }
1322  //**********************************************************************************************
1323 
1324  //**Default addition assignment to dense vectors (large matrices)*******************************
1338  template< typename VT1 // Type of the left-hand side target vector
1339  , typename MT1 // Type of the left-hand side matrix operand
1340  , typename VT2 > // Type of the right-hand side vector operand
1342  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1343  {
1344  selectDefaultAddAssignKernel( y, A, x );
1345  }
1347  //**********************************************************************************************
1348 
1349  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1363  template< typename VT1 // Type of the left-hand side target vector
1364  , typename MT1 // Type of the left-hand side matrix operand
1365  , typename VT2 > // Type of the right-hand side vector operand
1367  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1368  {
1369  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1370 
1371  const size_t M( A.rows() );
1372  const size_t N( A.columns() );
1373 
1374  const size_t iblock( 32768UL / sizeof( ElementType ) );
1375  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1376 
1377  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
1378 
1379  for( size_t ii=0U; ii<M; ii+=iblock ) {
1380  for( size_t jj=0UL; jj<N; jj+=jblock )
1381  {
1382  const size_t jend( min( jj+jblock, N ) );
1383  const size_t itmp( min( ii+iblock, M ) );
1384  const size_t iend( ( IsUpper<MT1>::value )
1385  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1386  :( itmp ) );
1387 
1388  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1389  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1390 
1391  size_t i( ( IsLower<MT1>::value )
1392  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
1393  :( ii ) );
1394 
1395  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1396  {
1397  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1398 
1399  for( size_t j=jj; j<jend; ++j ) {
1400  const SIMDType x1( set( x[j] ) );
1401  xmm1 += A.load(i ,j) * x1;
1402  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1403  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1404  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1405  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
1406  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
1407  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
1408  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
1409  }
1410 
1411  y.store( i , y.load(i ) + xmm1 );
1412  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
1413  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
1414  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
1415  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5 );
1416  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6 );
1417  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7 );
1418  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8 );
1419  }
1420 
1421  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1422  {
1423  SIMDType xmm1, xmm2, xmm3, xmm4;
1424 
1425  for( size_t j=jj; j<jend; ++j ) {
1426  const SIMDType x1( set( x[j] ) );
1427  xmm1 += A.load(i ,j) * x1;
1428  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1429  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1430  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1431  }
1432 
1433  y.store( i , y.load(i ) + xmm1 );
1434  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
1435  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
1436  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4 );
1437  }
1438 
1439  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
1440  {
1441  SIMDType xmm1, xmm2, xmm3;
1442 
1443  for( size_t j=jj; j<jend; ++j ) {
1444  const SIMDType x1( set( x[j] ) );
1445  xmm1 += A.load(i ,j) * x1;
1446  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1447  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1448  }
1449 
1450  y.store( i , y.load(i ) + xmm1 );
1451  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2 );
1452  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3 );
1453  }
1454 
1455  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
1456  {
1457  SIMDType xmm1, xmm2;
1458 
1459  for( size_t j=jj; j<jend; ++j ) {
1460  const SIMDType x1( set( x[j] ) );
1461  xmm1 += A.load(i ,j) * x1;
1462  xmm2 += A.load(i+SIMDSIZE,j) * x1;
1463  }
1464 
1465  y.store( i , y.load(i ) + xmm1 );
1466  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2 );
1467  }
1468 
1469  for( ; i<ipos; i+=SIMDSIZE )
1470  {
1471  SIMDType xmm1;
1472 
1473  for( size_t j=jj; j<jend; ++j ) {
1474  xmm1 += A.load(i,j) * set( x[j] );
1475  }
1476 
1477  y.store( i, y.load(i) + xmm1 );
1478  }
1479 
1480  for( ; remainder && i<iend; ++i )
1481  {
1482  ElementType value = ElementType();
1483 
1484  for( size_t j=jj; j<jend; ++j ) {
1485  value += A(i,j) * x[j];
1486  }
1487 
1488  y[i] += value;
1489  }
1490  }
1491  }
1492  }
1494  //**********************************************************************************************
1495 
1496  //**BLAS-based addition assignment to dense vectors (default)***********************************
1510  template< typename VT1 // Type of the left-hand side target vector
1511  , typename MT1 // Type of the left-hand side matrix operand
1512  , typename VT2 > // Type of the right-hand side vector operand
1514  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1515  {
1516  selectLargeAddAssignKernel( y, A, x );
1517  }
1519  //**********************************************************************************************
1520 
1521  //**BLAS-based addition assignment to dense vectors*********************************************
1522 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
1523 
1536  template< typename VT1 // Type of the left-hand side target vector
1537  , typename MT1 // Type of the left-hand side matrix operand
1538  , typename VT2 > // Type of the right-hand side vector operand
1540  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1541  {
1542  using ET = ElementType_<VT1>;
1543 
1544  if( IsTriangular<MT1>::value ) {
1545  ResultType_<VT1> tmp( serial( x ) );
1546  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1547  addAssign( y, tmp );
1548  }
1549  else {
1550  gemv( y, A, x, ET(1), ET(1) );
1551  }
1552  }
1554 #endif
1555  //**********************************************************************************************
1556 
1557  //**Addition assignment to sparse vectors*******************************************************
1558  // No special implementation for the addition assignment to sparse vectors.
1559  //**********************************************************************************************
1560 
1561  //**Subtraction assignment to dense vectors*****************************************************
1574  template< typename VT1 > // Type of the target dense vector
1575  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1576  {
1578 
1579  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1580 
1581  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1582  return;
1583  }
1584 
1585  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1586  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1587 
1588  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1589  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1590  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1591  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1592 
1593  TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1594  }
1596  //**********************************************************************************************
1597 
1598  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1609  template< typename VT1 // Type of the left-hand side target vector
1610  , typename MT1 // Type of the left-hand side matrix operand
1611  , typename VT2 > // Type of the right-hand side vector operand
1612  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1613  {
1614  if( ( IsDiagonal<MT1>::value ) ||
1615  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1616  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1617  selectSmallSubAssignKernel( y, A, x );
1618  else
1619  selectBlasSubAssignKernel( y, A, x );
1620  }
1622  //**********************************************************************************************
1623 
1624  //**Default subtraction assignment to dense vectors*********************************************
1638  template< typename VT1 // Type of the left-hand side target vector
1639  , typename MT1 // Type of the left-hand side matrix operand
1640  , typename VT2 > // Type of the right-hand side vector operand
1641  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1642  {
1643  const size_t M( A.rows() );
1644  const size_t N( A.columns() );
1645 
1646  for( size_t j=0UL; j<N; ++j )
1647  {
1649  {
1650  y[j] -= A(j,j) * x[j];
1651  }
1652  else
1653  {
1654  const size_t ibegin( ( IsLower<MT1>::value )
1655  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1656  :( 0UL ) );
1657  const size_t iend( ( IsUpper<MT1>::value )
1658  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1659  :( M ) );
1660  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1661 
1662  const size_t inum( iend - ibegin );
1663  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1664 
1665  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1666  y[i ] -= A(i ,j) * x[j];
1667  y[i+1UL] -= A(i+1UL,j) * x[j];
1668  }
1669  if( ipos < iend ) {
1670  y[ipos] -= A(ipos,j) * x[j];
1671  }
1672  }
1673  }
1674  }
1676  //**********************************************************************************************
1677 
1678  //**Default subtraction assignment to dense vectors (small matrices)****************************
1692  template< typename VT1 // Type of the left-hand side target vector
1693  , typename MT1 // Type of the left-hand side matrix operand
1694  , typename VT2 > // Type of the right-hand side vector operand
1696  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1697  {
1698  selectDefaultSubAssignKernel( y, A, x );
1699  }
1701  //**********************************************************************************************
1702 
1703  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1718  template< typename VT1 // Type of the left-hand side target vector
1719  , typename MT1 // Type of the left-hand side matrix operand
1720  , typename VT2 > // Type of the right-hand side vector operand
1722  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1723  {
1724  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1725 
1726  const size_t M( A.rows() );
1727  const size_t N( A.columns() );
1728 
1729  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
1730  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1731 
1732  size_t i( 0UL );
1733 
1734  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1735  {
1736  const size_t jbegin( ( IsUpper<MT1>::value )
1737  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1738  :( 0UL ) );
1739  const size_t jend( ( IsLower<MT1>::value )
1740  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1741  :( N ) );
1742  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1743 
1744  SIMDType xmm1( y.load(i ) );
1745  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1746  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1747  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1748  SIMDType xmm5( y.load(i+SIMDSIZE*4UL) );
1749  SIMDType xmm6( y.load(i+SIMDSIZE*5UL) );
1750  SIMDType xmm7( y.load(i+SIMDSIZE*6UL) );
1751  SIMDType xmm8( y.load(i+SIMDSIZE*7UL) );
1752 
1753  for( size_t j=jbegin; j<jend; ++j ) {
1754  const SIMDType x1( set( x[j] ) );
1755  xmm1 -= A.load(i ,j) * x1;
1756  xmm2 -= A.load(i+SIMDSIZE ,j) * x1;
1757  xmm3 -= A.load(i+SIMDSIZE*2UL,j) * x1;
1758  xmm4 -= A.load(i+SIMDSIZE*3UL,j) * x1;
1759  xmm5 -= A.load(i+SIMDSIZE*4UL,j) * x1;
1760  xmm6 -= A.load(i+SIMDSIZE*5UL,j) * x1;
1761  xmm7 -= A.load(i+SIMDSIZE*6UL,j) * x1;
1762  xmm8 -= A.load(i+SIMDSIZE*7UL,j) * x1;
1763  }
1764 
1765  y.store( i , xmm1 );
1766  y.store( i+SIMDSIZE , xmm2 );
1767  y.store( i+SIMDSIZE*2UL, xmm3 );
1768  y.store( i+SIMDSIZE*3UL, xmm4 );
1769  y.store( i+SIMDSIZE*4UL, xmm5 );
1770  y.store( i+SIMDSIZE*5UL, xmm6 );
1771  y.store( i+SIMDSIZE*6UL, xmm7 );
1772  y.store( i+SIMDSIZE*7UL, xmm8 );
1773  }
1774 
1775  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1776  {
1777  const size_t jbegin( ( IsUpper<MT1>::value )
1778  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1779  :( 0UL ) );
1780  const size_t jend( ( IsLower<MT1>::value )
1781  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1782  :( N ) );
1783  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1784 
1785  SIMDType xmm1( y.load(i ) );
1786  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1787  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1788  SIMDType xmm4( y.load(i+SIMDSIZE*3UL) );
1789 
1790  for( size_t j=jbegin; j<jend; ++j ) {
1791  const SIMDType x1( set( x[j] ) );
1792  xmm1 -= A.load(i ,j) * x1;
1793  xmm2 -= A.load(i+SIMDSIZE ,j) * x1;
1794  xmm3 -= A.load(i+SIMDSIZE*2UL,j) * x1;
1795  xmm4 -= A.load(i+SIMDSIZE*3UL,j) * x1;
1796  }
1797 
1798  y.store( i , xmm1 );
1799  y.store( i+SIMDSIZE , xmm2 );
1800  y.store( i+SIMDSIZE*2UL, xmm3 );
1801  y.store( i+SIMDSIZE*3UL, xmm4 );
1802  }
1803 
1804  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
1805  {
1806  const size_t jbegin( ( IsUpper<MT1>::value )
1807  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1808  :( 0UL ) );
1809  const size_t jend( ( IsLower<MT1>::value )
1810  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1811  :( N ) );
1812  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1813 
1814  SIMDType xmm1( y.load(i ) );
1815  SIMDType xmm2( y.load(i+SIMDSIZE ) );
1816  SIMDType xmm3( y.load(i+SIMDSIZE*2UL) );
1817 
1818  for( size_t j=jbegin; j<jend; ++j ) {
1819  const SIMDType x1( set( x[j] ) );
1820  xmm1 -= A.load(i ,j) * x1;
1821  xmm2 -= A.load(i+SIMDSIZE ,j) * x1;
1822  xmm3 -= A.load(i+SIMDSIZE*2UL,j) * x1;
1823  }
1824 
1825  y.store( i , xmm1 );
1826  y.store( i+SIMDSIZE , xmm2 );
1827  y.store( i+SIMDSIZE*2UL, xmm3 );
1828  }
1829 
1830  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
1831  {
1832  const size_t jbegin( ( IsUpper<MT1>::value )
1833  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1834  :( 0UL ) );
1835  const size_t jend( ( IsLower<MT1>::value )
1836  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1837  :( N ) );
1838  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1839 
1840  SIMDType xmm1( y.load(i ) );
1841  SIMDType xmm2( y.load(i+SIMDSIZE) );
1842 
1843  for( size_t j=jbegin; j<jend; ++j ) {
1844  const SIMDType x1( set( x[j] ) );
1845  xmm1 -= A.load(i ,j) * x1;
1846  xmm2 -= A.load(i+SIMDSIZE,j) * x1;
1847  }
1848 
1849  y.store( i , xmm1 );
1850  y.store( i+SIMDSIZE, xmm2 );
1851  }
1852 
1853  for( ; i<ipos; i+=SIMDSIZE )
1854  {
1855  const size_t jbegin( ( IsUpper<MT1>::value )
1856  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1857  :( 0UL ) );
1858  const size_t jend( ( IsLower<MT1>::value )
1859  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1860  :( N ) );
1861  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1862 
1863  SIMDType xmm1( y.load(i) );
1864 
1865  for( size_t j=jbegin; j<jend; ++j ) {
1866  xmm1 -= A.load(i,j) * set( x[j] );
1867  }
1868 
1869  y.store( i, xmm1 );
1870  }
1871 
1872  for( ; remainder && i<M; ++i )
1873  {
1874  const size_t jbegin( ( IsUpper<MT1>::value )
1875  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1876  :( 0UL ) );
1877  const size_t jend( ( IsLower<MT1>::value )
1878  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1879  :( N ) );
1880  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1881 
1882  ElementType value = ElementType();
1883 
1884  for( size_t j=jbegin; j<jend; ++j ) {
1885  value += A(i,j) * x[j];
1886  }
1887 
1888  y[i] -= value;
1889  }
1890  }
1892  //**********************************************************************************************
1893 
1894  //**Default subtraction assignment to dense vectors (large matrices)****************************
1908  template< typename VT1 // Type of the left-hand side target vector
1909  , typename MT1 // Type of the left-hand side matrix operand
1910  , typename VT2 > // Type of the right-hand side vector operand
1912  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1913  {
1914  selectDefaultSubAssignKernel( y, A, x );
1915  }
1917  //**********************************************************************************************
1918 
1919  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
1934  template< typename VT1 // Type of the left-hand side target vector
1935  , typename MT1 // Type of the left-hand side matrix operand
1936  , typename VT2 > // Type of the right-hand side vector operand
1938  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1939  {
1940  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1941 
1942  const size_t M( A.rows() );
1943  const size_t N( A.columns() );
1944 
1945  const size_t iblock( 32768UL / sizeof( ElementType ) );
1946  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1947 
1948  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
1949 
1950  for( size_t ii=0U; ii<M; ii+=iblock ) {
1951  for( size_t jj=0UL; jj<N; jj+=jblock )
1952  {
1953  const size_t jend( min( jj+jblock, N ) );
1954  const size_t itmp( min( ii+iblock, M ) );
1955  const size_t iend( ( IsUpper<MT1>::value )
1956  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1957  :( itmp ) );
1958 
1959  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1960  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1961 
1962  size_t i( ( IsLower<MT1>::value )
1963  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
1964  :( ii ) );
1965 
1966  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
1967  {
1968  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1969 
1970  for( size_t j=jj; j<jend; ++j ) {
1971  const SIMDType x1( set( x[j] ) );
1972  xmm1 += A.load(i ,j) * x1;
1973  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
1974  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
1975  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
1976  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
1977  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
1978  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
1979  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
1980  }
1981 
1982  y.store( i , y.load(i ) - xmm1 );
1983  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2 );
1984  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3 );
1985  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4 );
1986  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) - xmm5 );
1987  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) - xmm6 );
1988  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) - xmm7 );
1989  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) - xmm8 );
1990  }
1991 
1992  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1993  {
1994  SIMDType xmm1, xmm2, xmm3, xmm4;
1995 
1996  for( size_t j=jj; j<jend; ++j ) {
1997  const SIMDType x1( set( x[j] ) );
1998  xmm1 += A.load(i ,j) * x1;
1999  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2000  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2001  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
2002  }
2003 
2004  y.store( i , y.load(i ) - xmm1 );
2005  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2 );
2006  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3 );
2007  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4 );
2008  }
2009 
2010  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
2011  {
2012  SIMDType xmm1, xmm2, xmm3;
2013 
2014  for( size_t j=jj; j<jend; ++j ) {
2015  const SIMDType x1( set( x[j] ) );
2016  xmm1 += A.load(i ,j) * x1;
2017  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2018  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2019  }
2020 
2021  y.store( i , y.load(i ) - xmm1 );
2022  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2 );
2023  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3 );
2024  }
2025 
2026  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
2027  {
2028  SIMDType xmm1, xmm2;
2029 
2030  for( size_t j=jj; j<jend; ++j ) {
2031  const SIMDType x1( set( x[j] ) );
2032  xmm1 += A.load(i ,j) * x1;
2033  xmm2 += A.load(i+SIMDSIZE,j) * x1;
2034  }
2035 
2036  y.store( i , y.load(i ) - xmm1 );
2037  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) - xmm2 );
2038  }
2039 
2040  for( ; i<ipos; i+=SIMDSIZE )
2041  {
2042  SIMDType xmm1;
2043 
2044  for( size_t j=jj; j<jend; ++j ) {
2045  xmm1 += A.load(i,j) * set( x[j] );
2046  }
2047 
2048  y.store( i, y.load(i) - xmm1 );
2049  }
2050 
2051  for( ; remainder && i<iend; ++i )
2052  {
2053  ElementType value = ElementType();
2054 
2055  for( size_t j=jj; j<jend; ++j ) {
2056  value += A(i,j) * x[j];
2057  }
2058 
2059  y[i] -= value;
2060  }
2061  }
2062  }
2063  }
2065  //**********************************************************************************************
2066 
2067  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2081  template< typename VT1 // Type of the left-hand side target vector
2082  , typename MT1 // Type of the left-hand side matrix operand
2083  , typename VT2 > // Type of the right-hand side vector operand
2085  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2086  {
2087  selectLargeSubAssignKernel( y, A, x );
2088  }
2090  //**********************************************************************************************
2091 
2092  //**BLAS-based subtraction assignment to dense vectors******************************************
2093 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
2094 
2107  template< typename VT1 // Type of the left-hand side target vector
2108  , typename MT1 // Type of the left-hand side matrix operand
2109  , typename VT2 > // Type of the right-hand side vector operand
2111  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2112  {
2113  using ET = ElementType_<VT1>;
2114 
2115  if( IsTriangular<MT1>::value ) {
2116  ResultType_<VT1> tmp( serial( x ) );
2117  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2118  subAssign( y, tmp );
2119  }
2120  else {
2121  gemv( y, A, x, ET(-1), ET(1) );
2122  }
2123  }
2125 #endif
2126  //**********************************************************************************************
2127 
2128  //**Subtraction assignment to sparse vectors****************************************************
2129  // No special implementation for the subtraction assignment to sparse vectors.
2130  //**********************************************************************************************
2131 
2132  //**Multiplication assignment to dense vectors**************************************************
2145  template< typename VT1 > // Type of the target dense vector
2146  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2147  {
2149 
2153 
2154  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2155 
2156  const ResultType tmp( serial( rhs ) );
2157  multAssign( ~lhs, tmp );
2158  }
2160  //**********************************************************************************************
2161 
2162  //**Multiplication assignment to sparse vectors*************************************************
2163  // No special implementation for the multiplication assignment to sparse vectors.
2164  //**********************************************************************************************
2165 
2166  //**Division assignment to dense vectors********************************************************
2179  template< typename VT1 > // Type of the target dense vector
2180  friend inline void divAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2181  {
2183 
2187 
2188  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2189 
2190  const ResultType tmp( serial( rhs ) );
2191  divAssign( ~lhs, tmp );
2192  }
2194  //**********************************************************************************************
2195 
2196  //**Division assignment to sparse vectors*******************************************************
2197  // No special implementation for the division assignment to sparse vectors.
2198  //**********************************************************************************************
2199 
2200  //**SMP assignment to dense vectors*************************************************************
2215  template< typename VT1 > // Type of the target dense vector
2216  friend inline EnableIf_< UseSMPAssign<VT1> >
2218  {
2220 
2221  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2222 
2223  if( rhs.mat_.rows() == 0UL ) {
2224  return;
2225  }
2226  else if( rhs.mat_.columns() == 0UL ) {
2227  reset( ~lhs );
2228  return;
2229  }
2230 
2231  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2232  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2233 
2234  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2235  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2236  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2237  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2238 
2239  smpAssign( ~lhs, A * x );
2240  }
2242  //**********************************************************************************************
2243 
2244  //**SMP assignment to sparse vectors************************************************************
2259  template< typename VT1 > // Type of the target sparse vector
2260  friend inline EnableIf_< UseSMPAssign<VT1> >
2262  {
2264 
2268 
2269  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2270 
2271  const ResultType tmp( rhs );
2272  smpAssign( ~lhs, tmp );
2273  }
2275  //**********************************************************************************************
2276 
2277  //**SMP addition assignment to dense vectors****************************************************
2292  template< typename VT1 > // Type of the target dense vector
2293  friend inline EnableIf_< UseSMPAssign<VT1> >
2295  {
2297 
2298  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2299 
2300  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2301  return;
2302  }
2303 
2304  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2305  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2306 
2307  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2308  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2309  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2310  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2311 
2312  smpAddAssign( ~lhs, A * x );
2313  }
2315  //**********************************************************************************************
2316 
2317  //**SMP addition assignment to sparse vectors***************************************************
2318  // No special implementation for the SMP addition assignment to sparse vectors.
2319  //**********************************************************************************************
2320 
2321  //**SMP subtraction assignment to dense vectors*************************************************
2336  template< typename VT1 > // Type of the target dense vector
2337  friend inline EnableIf_< UseSMPAssign<VT1> >
2339  {
2341 
2342  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2343 
2344  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2345  return;
2346  }
2347 
2348  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2349  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2350 
2351  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2352  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2353  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2354  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2355 
2356  smpSubAssign( ~lhs, A * x );
2357  }
2359  //**********************************************************************************************
2360 
2361  //**SMP subtraction assignment to sparse vectors************************************************
2362  // No special implementation for the SMP subtraction assignment to sparse vectors.
2363  //**********************************************************************************************
2364 
2365  //**SMP multiplication assignment to dense vectors**********************************************
2380  template< typename VT1 > // Type of the target dense vector
2381  friend inline EnableIf_< UseSMPAssign<VT1> >
2383  {
2385 
2389 
2390  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2391 
2392  const ResultType tmp( rhs );
2393  smpMultAssign( ~lhs, tmp );
2394  }
2396  //**********************************************************************************************
2397 
2398  //**SMP multiplication assignment to sparse vectors*********************************************
2399  // No special implementation for the SMP multiplication assignment to sparse vectors.
2400  //**********************************************************************************************
2401 
2402  //**SMP division assignment to dense vectors****************************************************
2417  template< typename VT1 > // Type of the target dense vector
2418  friend inline EnableIf_< UseSMPAssign<VT1> >
2420  {
2422 
2426 
2427  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2428 
2429  const ResultType tmp( rhs );
2430  smpDivAssign( ~lhs, tmp );
2431  }
2433  //**********************************************************************************************
2434 
2435  //**SMP division assignment to sparse vectors***************************************************
2436  // No special implementation for the SMP division assignment to sparse vectors.
2437  //**********************************************************************************************
2438 
2439  //**Compile time checks*************************************************************************
2447  //**********************************************************************************************
2448 };
2449 //*************************************************************************************************
2450 
2451 
2452 
2453 
2454 //=================================================================================================
2455 //
2456 // DVECSCALARMULTEXPR SPECIALIZATION
2457 //
2458 //=================================================================================================
2459 
2460 //*************************************************************************************************
2469 template< typename MT // Type of the left-hand side dense matrix
2470  , typename VT // Type of the right-hand side dense vector
2471  , typename ST > // Type of the side scalar value
2472 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
2473  : public VecScalarMultExpr< DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false > >
2474  , private Computation
2475 {
2476  private:
2477  //**Type definitions****************************************************************************
2478  using MVM = TDMatDVecMultExpr<MT,VT>;
2479  using RES = ResultType_<MVM>;
2480  using MRT = ResultType_<MT>;
2481  using VRT = ResultType_<VT>;
2482  using MET = ElementType_<MRT>;
2483  using VET = ElementType_<VRT>;
2484  using MCT = CompositeType_<MT>;
2485  using VCT = CompositeType_<VT>;
2486  //**********************************************************************************************
2487 
2488  //**********************************************************************************************
2490  enum : bool { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2492  //**********************************************************************************************
2493 
2494  //**********************************************************************************************
2496  enum : bool { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2497  //**********************************************************************************************
2498 
2499  //**********************************************************************************************
2501 
2504  template< typename T1 >
2505  struct UseSMPAssign {
2506  enum : bool { value = ( evaluateMatrix || evaluateVector ) };
2507  };
2508  //**********************************************************************************************
2509 
2510  //**********************************************************************************************
2512 
2514  template< typename T1, typename T2, typename T3, typename T4 >
2515  struct UseBlasKernel {
2521  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
2526  IsSame< ElementType_<T1>, ElementType_<T3> >::value &&
2528  };
2529  //**********************************************************************************************
2530 
2531  //**********************************************************************************************
2533 
2536  template< typename T1, typename T2, typename T3, typename T4 >
2537  struct UseVectorizedDefaultKernel {
2538  enum : bool { value = useOptimizedKernels &&
2540  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
2544  , T4 >::value &&
2545  HasSIMDAdd< ElementType_<T2>, ElementType_<T3> >::value &&
2546  HasSIMDMult< ElementType_<T2>, ElementType_<T3> >::value };
2547  };
2548  //**********************************************************************************************
2549 
2550  public:
2551  //**Type definitions****************************************************************************
2553  using ResultType = MultTrait_<RES,ST>;
2557  using ReturnType = const ElementType;
2558  using CompositeType = const ResultType;
2559 
2561  using LeftOperand = const TDMatDVecMultExpr<MT,VT>;
2562 
2564  using RightOperand = ST;
2565 
2568 
2571  //**********************************************************************************************
2572 
2573  //**Compilation flags***************************************************************************
2575  enum : bool { simdEnabled = !IsDiagonal<MT>::value &&
2576  MT::simdEnabled && VT::simdEnabled &&
2580 
2582  enum : bool { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2583  !evaluateVector && VT::smpAssignable };
2584  //**********************************************************************************************
2585 
2586  //**SIMD properties*****************************************************************************
2588  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
2589  //**********************************************************************************************
2590 
2591  //**Constructor*********************************************************************************
2597  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
2598  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2599  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2600  {}
2601  //**********************************************************************************************
2602 
2603  //**Subscript operator**************************************************************************
2609  inline ReturnType operator[]( size_t index ) const {
2610  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2611  return vector_[index] * scalar_;
2612  }
2613  //**********************************************************************************************
2614 
2615  //**At function*********************************************************************************
2622  inline ReturnType at( size_t index ) const {
2623  if( index >= vector_.size() ) {
2624  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
2625  }
2626  return (*this)[index];
2627  }
2628  //**********************************************************************************************
2629 
2630  //**Size function*******************************************************************************
2635  inline size_t size() const {
2636  return vector_.size();
2637  }
2638  //**********************************************************************************************
2639 
2640  //**Left operand access*************************************************************************
2645  inline LeftOperand leftOperand() const {
2646  return vector_;
2647  }
2648  //**********************************************************************************************
2649 
2650  //**Right operand access************************************************************************
2655  inline RightOperand rightOperand() const {
2656  return scalar_;
2657  }
2658  //**********************************************************************************************
2659 
2660  //**********************************************************************************************
2666  template< typename T >
2667  inline bool canAlias( const T* alias ) const {
2668  return vector_.canAlias( alias );
2669  }
2670  //**********************************************************************************************
2671 
2672  //**********************************************************************************************
2678  template< typename T >
2679  inline bool isAliased( const T* alias ) const {
2680  return vector_.isAliased( alias );
2681  }
2682  //**********************************************************************************************
2683 
2684  //**********************************************************************************************
2689  inline bool isAligned() const {
2690  return vector_.isAligned();
2691  }
2692  //**********************************************************************************************
2693 
2694  //**********************************************************************************************
2699  inline bool canSMPAssign() const noexcept {
2700  LeftOperand_<MVM> A( vector_.leftOperand() );
2701  return ( !BLAZE_BLAS_MODE ||
2704  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2705  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2706  ( size() > SMP_TDMATDVECMULT_THRESHOLD );
2707  }
2708  //**********************************************************************************************
2709 
2710  private:
2711  //**Member variables****************************************************************************
2712  LeftOperand vector_;
2713  RightOperand scalar_;
2714  //**********************************************************************************************
2715 
2716  //**Assignment to dense vectors*****************************************************************
2728  template< typename VT1 > // Type of the target dense vector
2729  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2730  {
2732 
2733  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2734 
2735  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
2736  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
2737 
2738  if( left.rows() == 0UL ) {
2739  return;
2740  }
2741  else if( left.columns() == 0UL ) {
2742  reset( ~lhs );
2743  return;
2744  }
2745 
2746  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2747  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2748 
2749  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2750  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2751  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2752  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2753 
2754  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2755  }
2756  //**********************************************************************************************
2757 
2758  //**Assignment to dense vectors (kernel selection)**********************************************
2769  template< typename VT1 // Type of the left-hand side target vector
2770  , typename MT1 // Type of the left-hand side matrix operand
2771  , typename VT2 // Type of the right-hand side vector operand
2772  , typename ST2 > // Type of the scalar value
2773  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2774  {
2775  if( ( IsDiagonal<MT1>::value ) ||
2776  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2777  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2778  selectSmallAssignKernel( y, A, x, scalar );
2779  else
2780  selectBlasAssignKernel( y, A, x, scalar );
2781  }
2782  //**********************************************************************************************
2783 
2784  //**Default assignment to dense vectors*********************************************************
2798  template< typename VT1 // Type of the left-hand side target vector
2799  , typename MT1 // Type of the left-hand side matrix operand
2800  , typename VT2 // Type of the right-hand side vector operand
2801  , typename ST2 > // Type of the scalar value
2802  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2803  {
2804  const size_t M( A.rows() );
2805  const size_t N( A.columns() );
2806 
2808  reset( y[0] );
2809  }
2810 
2811  if( !IsUpper<MT1>::value )
2812  {
2813  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
2814  y[i] = A(i,0UL) * x[0UL];
2815  }
2816  }
2817 
2818  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
2819  {
2821  {
2822  y[j] = A(j,j) * x[j] * scalar;
2823  }
2824  else
2825  {
2826  const size_t ibegin( ( IsLower<MT1>::value )
2827  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2828  :( 0UL ) );
2829  const size_t iend( ( IsUpper<MT1>::value )
2830  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
2831  :( M ) );
2832  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2833 
2834  const size_t inum( iend - ibegin );
2835  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2836 
2837  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2838  y[i ] += A(i ,j) * x[j];
2839  y[i+1UL] += A(i+1UL,j) * x[j];
2840  }
2841  if( ipos < iend ) {
2842  y[ipos] += A(ipos,j) * x[j];
2843  }
2844  if( IsUpper<MT1>::value ) {
2845  y[iend] = A(iend,j) * x[j];
2846  }
2847  }
2848  }
2849 
2851  reset( y[M-1UL] );
2852  }
2853 
2854  if( !IsDiagonal<MT1>::value )
2855  {
2856  const size_t iend( IsStrictlyUpper<MT1>::value ? M-1UL : M );
2857  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<iend; ++i ) {
2858  y[i] *= scalar;
2859  }
2860  }
2861  }
2862  //**********************************************************************************************
2863 
2864  //**Default assignment to dense vectors (small matrices)****************************************
2878  template< typename VT1 // Type of the left-hand side target vector
2879  , typename MT1 // Type of the left-hand side matrix operand
2880  , typename VT2 // Type of the right-hand side vector operand
2881  , typename ST2 > // Type of the scalar value
2883  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2884  {
2885  selectDefaultAssignKernel( y, A, x, scalar );
2886  }
2887  //**********************************************************************************************
2888 
2889  //**Vectorized default assignment to dense vectors (small matrices)*****************************
2903  template< typename VT1 // Type of the left-hand side target vector
2904  , typename MT1 // Type of the left-hand side matrix operand
2905  , typename VT2 // Type of the right-hand side vector operand
2906  , typename ST2 > // Type of the scalar value
2908  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2909  {
2910  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
2911 
2912  const size_t M( A.rows() );
2913  const size_t N( A.columns() );
2914 
2915  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
2916  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
2917 
2918  const SIMDType factor( set( scalar ) );
2919 
2920  size_t i( 0UL );
2921 
2922  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
2923  {
2924  const size_t jbegin( ( IsUpper<MT1>::value )
2925  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2926  :( 0UL ) );
2927  const size_t jend( ( IsLower<MT1>::value )
2928  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2929  :( N ) );
2930  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2931 
2932  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2933 
2934  for( size_t j=jbegin; j<jend; ++j ) {
2935  const SIMDType x1( set( x[j] ) );
2936  xmm1 += A.load(i ,j) * x1;
2937  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2938  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2939  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
2940  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
2941  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
2942  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
2943  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
2944  }
2945 
2946  y.store( i , xmm1*factor );
2947  y.store( i+SIMDSIZE , xmm2*factor );
2948  y.store( i+SIMDSIZE*2UL, xmm3*factor );
2949  y.store( i+SIMDSIZE*3UL, xmm4*factor );
2950  y.store( i+SIMDSIZE*4UL, xmm5*factor );
2951  y.store( i+SIMDSIZE*5UL, xmm6*factor );
2952  y.store( i+SIMDSIZE*6UL, xmm7*factor );
2953  y.store( i+SIMDSIZE*7UL, xmm8*factor );
2954  }
2955 
2956  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
2957  {
2958  const size_t jbegin( ( IsUpper<MT1>::value )
2959  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2960  :( 0UL ) );
2961  const size_t jend( ( IsLower<MT1>::value )
2962  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2963  :( N ) );
2964  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2965 
2966  SIMDType xmm1, xmm2, xmm3, xmm4;
2967 
2968  for( size_t j=jbegin; j<jend; ++j ) {
2969  const SIMDType x1( set( x[j] ) );
2970  xmm1 += A.load(i ,j) * x1;
2971  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2972  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2973  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
2974  }
2975 
2976  y.store( i , xmm1*factor );
2977  y.store( i+SIMDSIZE , xmm2*factor );
2978  y.store( i+SIMDSIZE*2UL, xmm3*factor );
2979  y.store( i+SIMDSIZE*3UL, xmm4*factor );
2980  }
2981 
2982  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
2983  {
2984  const size_t jbegin( ( IsUpper<MT1>::value )
2985  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2986  :( 0UL ) );
2987  const size_t jend( ( IsLower<MT1>::value )
2988  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2989  :( N ) );
2990  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2991 
2992  SIMDType xmm1, xmm2, xmm3;
2993 
2994  for( size_t j=jbegin; j<jend; ++j ) {
2995  const SIMDType x1( set( x[j] ) );
2996  xmm1 += A.load(i ,j) * x1;
2997  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
2998  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
2999  }
3000 
3001  y.store( i , xmm1*factor );
3002  y.store( i+SIMDSIZE , xmm2*factor );
3003  y.store( i+SIMDSIZE*2UL, xmm3*factor );
3004  }
3005 
3006  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3007  {
3008  const size_t jbegin( ( IsUpper<MT1>::value )
3009  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3010  :( 0UL ) );
3011  const size_t jend( ( IsLower<MT1>::value )
3012  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3013  :( N ) );
3014  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3015 
3016  SIMDType xmm1, xmm2;
3017 
3018  for( size_t j=jbegin; j<jend; ++j ) {
3019  const SIMDType x1( set( x[j] ) );
3020  xmm1 += A.load(i ,j) * x1;
3021  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3022  }
3023 
3024  y.store( i , xmm1*factor );
3025  y.store( i+SIMDSIZE, xmm2*factor );
3026  }
3027 
3028  for( ; i<ipos; i+=SIMDSIZE )
3029  {
3030  const size_t jbegin( ( IsUpper<MT1>::value )
3031  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3032  :( 0UL ) );
3033  const size_t jend( ( IsLower<MT1>::value )
3034  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3035  :( N ) );
3036  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3037 
3038  SIMDType xmm1;
3039 
3040  for( size_t j=jbegin; j<jend; ++j ) {
3041  const SIMDType x1( set( x[j] ) );
3042  xmm1 += A.load(i,j) * x1;
3043  }
3044 
3045  y.store( i, xmm1*factor );
3046  }
3047 
3048  for( ; remainder && i<M; ++i )
3049  {
3050  const size_t jbegin( ( IsUpper<MT1>::value )
3051  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3052  :( 0UL ) );
3053  const size_t jend( ( IsLower<MT1>::value )
3054  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3055  :( N ) );
3056  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3057 
3058  ElementType value = ElementType();
3059 
3060  for( size_t j=jbegin; j<jend; ++j ) {
3061  value += A(i,j) * x[j];
3062  }
3063 
3064  y[i] = value * scalar;
3065  }
3066  }
3067  //**********************************************************************************************
3068 
3069  //**Default assignment to dense vectors (large matrices)****************************************
3083  template< typename VT1 // Type of the left-hand side target vector
3084  , typename MT1 // Type of the left-hand side matrix operand
3085  , typename VT2 // Type of the right-hand side vector operand
3086  , typename ST2 > // Type of the scalar value
3088  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3089  {
3090  selectDefaultAssignKernel( y, A, x, scalar );
3091  }
3092  //**********************************************************************************************
3093 
3094  //**Vectorized default assignment to dense vectors (large matrices)*****************************
3108  template< typename VT1 // Type of the left-hand side target vector
3109  , typename MT1 // Type of the left-hand side matrix operand
3110  , typename VT2 // Type of the right-hand side vector operand
3111  , typename ST2 > // Type of the scalar value
3113  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3114  {
3115  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3116 
3117  const size_t M( A.rows() );
3118  const size_t N( A.columns() );
3119 
3120  const size_t iblock( 32768UL / sizeof( ElementType ) );
3121  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3122 
3123  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
3124 
3125  const SIMDType factor( set( scalar ) );
3126 
3127  reset( y );
3128 
3129  for( size_t ii=0U; ii<M; ii+=iblock ) {
3130  for( size_t jj=0UL; jj<N; jj+=jblock )
3131  {
3132  const size_t jend( min( jj+jblock, N ) );
3133  const size_t itmp( min( ii+iblock, M ) );
3134  const size_t iend( ( IsUpper<MT1>::value )
3135  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3136  :( itmp ) );
3137 
3138  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
3139  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
3140 
3141  size_t i( ( IsLower<MT1>::value )
3142  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
3143  :( ii ) );
3144 
3145  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
3146  {
3147  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3148 
3149  for( size_t j=jj; j<jend; ++j ) {
3150  const SIMDType x1( set( x[j] ) );
3151  xmm1 += A.load(i ,j) * x1;
3152  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3153  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3154  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3155  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
3156  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
3157  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
3158  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
3159  }
3160 
3161  y.store( i , y.load(i ) + xmm1*factor );
3162  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3163  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3164  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3165  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5*factor );
3166  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6*factor );
3167  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7*factor );
3168  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8*factor );
3169  }
3170 
3171  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
3172  {
3173  SIMDType xmm1, xmm2, xmm3, xmm4;
3174 
3175  for( size_t j=jj; j<jend; ++j ) {
3176  const SIMDType x1( set( x[j] ) );
3177  xmm1 += A.load(i ,j) * x1;
3178  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3179  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3180  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3181  }
3182 
3183  y.store( i , y.load(i ) + xmm1*factor );
3184  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3185  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3186  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3187  }
3188 
3189  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
3190  {
3191  SIMDType xmm1, xmm2, xmm3;
3192 
3193  for( size_t j=jj; j<jend; ++j ) {
3194  const SIMDType x1( set( x[j] ) );
3195  xmm1 += A.load(i ,j) * x1;
3196  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3197  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3198  }
3199 
3200  y.store( i , y.load(i ) + xmm1*factor );
3201  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3202  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3203  }
3204 
3205  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3206  {
3207  SIMDType xmm1, xmm2;
3208 
3209  for( size_t j=jj; j<jend; ++j ) {
3210  const SIMDType x1( set( x[j] ) );
3211  xmm1 += A.load(i ,j) * x1;
3212  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3213  }
3214 
3215  y.store( i , y.load(i ) + xmm1*factor );
3216  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2*factor );
3217  }
3218 
3219  for( ; i<ipos; i+=SIMDSIZE )
3220  {
3221  SIMDType xmm1;
3222 
3223  for( size_t j=jj; j<jend; ++j ) {
3224  xmm1 += A.load(i,j) * set( x[j] );
3225  }
3226 
3227  y.store( i, y.load(i) + xmm1*factor );
3228  }
3229 
3230  for( ; remainder && i<iend; ++i )
3231  {
3232  ElementType value = ElementType();
3233 
3234  for( size_t j=jj; j<jend; ++j ) {
3235  value += A(i,j) * x[j];
3236  }
3237 
3238  y[i] += value * scalar;
3239  }
3240  }
3241  }
3242  }
3243  //**********************************************************************************************
3244 
3245  //**BLAS-based assignment to dense vectors (default)********************************************
3259  template< typename VT1 // Type of the left-hand side target vector
3260  , typename MT1 // Type of the left-hand side matrix operand
3261  , typename VT2 // Type of the right-hand side vector operand
3262  , typename ST2 > // Type of the scalar value
3264  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3265  {
3266  selectLargeAssignKernel( y, A, x, scalar );
3267  }
3268  //**********************************************************************************************
3269 
3270  //**BLAS-based assignment to dense vectors******************************************************
3271 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
3272 
3285  template< typename VT1 // Type of the left-hand side target vector
3286  , typename MT1 // Type of the left-hand side matrix operand
3287  , typename VT2 // Type of the right-hand side vector operand
3288  , typename ST2 > // Type of the scalar value
3290  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3291  {
3292  using ET = ElementType_<VT1>;
3293 
3294  if( IsTriangular<MT1>::value ) {
3295  assign( y, scalar * x );
3296  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3297  }
3298  else {
3299  gemv( y, A, x, ET(scalar), ET(0) );
3300  }
3301  }
3302 #endif
3303  //**********************************************************************************************
3304 
3305  //**Assignment to sparse vectors****************************************************************
3317  template< typename VT1 > // Type of the target sparse vector
3318  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3319  {
3321 
3325 
3326  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3327 
3328  const ResultType tmp( serial( rhs ) );
3329  assign( ~lhs, tmp );
3330  }
3331  //**********************************************************************************************
3332 
3333  //**Addition assignment to dense vectors********************************************************
3345  template< typename VT1 > // Type of the target dense vector
3346  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3347  {
3349 
3350  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3351 
3352  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
3353  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
3354 
3355  if( left.rows() == 0UL || left.columns() == 0UL ) {
3356  return;
3357  }
3358 
3359  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3360  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3361 
3362  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3363  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3364  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3365  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3366 
3367  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
3368  }
3369  //**********************************************************************************************
3370 
3371  //**Addition assignment to dense vectors (kernel selection)*************************************
3382  template< typename VT1 // Type of the left-hand side target vector
3383  , typename MT1 // Type of the left-hand side matrix operand
3384  , typename VT2 // Type of the right-hand side vector operand
3385  , typename ST2 > // Type of the scalar value
3386  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3387  {
3388  if( ( IsDiagonal<MT1>::value ) ||
3389  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3390  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3391  selectSmallAddAssignKernel( y, A, x, scalar );
3392  else
3393  selectBlasAddAssignKernel( y, A, x, scalar );
3394  }
3395  //**********************************************************************************************
3396 
3397  //**Default addition assignment to dense vectors************************************************
3411  template< typename VT1 // Type of the left-hand side target vector
3412  , typename MT1 // Type of the left-hand side matrix operand
3413  , typename VT2 // Type of the right-hand side vector operand
3414  , typename ST2 > // Type of the scalar value
3415  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3416  {
3417  y.addAssign( A * x * scalar );
3418  }
3419  //**********************************************************************************************
3420 
3421  //**Default addition assignment to dense vectors (small matrices)*******************************
3435  template< typename VT1 // Type of the left-hand side target vector
3436  , typename MT1 // Type of the left-hand side matrix operand
3437  , typename VT2 // Type of the right-hand side vector operand
3438  , typename ST2 > // Type of the scalar value
3440  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3441  {
3442  selectDefaultAddAssignKernel( y, A, x, scalar );
3443  }
3444  //**********************************************************************************************
3445 
3446  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3461  template< typename VT1 // Type of the left-hand side target vector
3462  , typename MT1 // Type of the left-hand side matrix operand
3463  , typename VT2 // Type of the right-hand side vector operand
3464  , typename ST2 > // Type of the scalar value
3466  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3467  {
3468  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3469 
3470  const size_t M( A.rows() );
3471  const size_t N( A.columns() );
3472 
3473  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
3474  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
3475 
3476  const SIMDType factor( set( scalar ) );
3477 
3478  size_t i( 0UL );
3479 
3480  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
3481  {
3482  const size_t jbegin( ( IsUpper<MT1>::value )
3483  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3484  :( 0UL ) );
3485  const size_t jend( ( IsLower<MT1>::value )
3486  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3487  :( N ) );
3488  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3489 
3490  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3491 
3492  for( size_t j=jbegin; j<jend; ++j ) {
3493  const SIMDType x1( set( x[j] ) );
3494  xmm1 += A.load(i ,j) * x1;
3495  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3496  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3497  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3498  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
3499  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
3500  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
3501  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
3502  }
3503 
3504  y.store( i , y.load(i ) + xmm1*factor );
3505  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3506  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3507  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3508  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5*factor );
3509  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6*factor );
3510  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7*factor );
3511  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8*factor );
3512  }
3513 
3514  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
3515  {
3516  const size_t jbegin( ( IsUpper<MT1>::value )
3517  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3518  :( 0UL ) );
3519  const size_t jend( ( IsLower<MT1>::value )
3520  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3521  :( N ) );
3522  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3523 
3524  SIMDType xmm1, xmm2, xmm3, xmm4;
3525 
3526  for( size_t j=jbegin; j<jend; ++j ) {
3527  const SIMDType x1( set( x[j] ) );
3528  xmm1 += A.load(i ,j) * x1;
3529  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3530  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3531  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3532  }
3533 
3534  y.store( i , y.load(i ) + xmm1*factor );
3535  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3536  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3537  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3538  }
3539 
3540  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
3541  {
3542  const size_t jbegin( ( IsUpper<MT1>::value )
3543  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3544  :( 0UL ) );
3545  const size_t jend( ( IsLower<MT1>::value )
3546  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3547  :( N ) );
3548  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3549 
3550  SIMDType xmm1, xmm2, xmm3;
3551 
3552  for( size_t j=jbegin; j<jend; ++j ) {
3553  const SIMDType x1( set( x[j] ) );
3554  xmm1 += A.load(i ,j) * x1;
3555  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3556  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3557  }
3558 
3559  y.store( i , y.load(i ) + xmm1*factor );
3560  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3561  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3562  }
3563 
3564  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3565  {
3566  const size_t jbegin( ( IsUpper<MT1>::value )
3567  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3568  :( 0UL ) );
3569  const size_t jend( ( IsLower<MT1>::value )
3570  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3571  :( N ) );
3572  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3573 
3574  SIMDType xmm1, xmm2;
3575 
3576  for( size_t j=jbegin; j<jend; ++j ) {
3577  const SIMDType x1( set( x[j] ) );
3578  xmm1 += A.load(i ,j) * x1;
3579  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3580  }
3581 
3582  y.store( i , y.load(i ) + xmm1*factor );
3583  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2*factor );
3584  }
3585 
3586  for( ; i<ipos; i+=SIMDSIZE )
3587  {
3588  const size_t jbegin( ( IsUpper<MT1>::value )
3589  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3590  :( 0UL ) );
3591  const size_t jend( ( IsLower<MT1>::value )
3592  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3593  :( N ) );
3594  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3595 
3596  SIMDType xmm1;
3597 
3598  for( size_t j=jbegin; j<jend; ++j ) {
3599  xmm1 += A.load(i,j) * set( x[j] );
3600  }
3601 
3602  y.store( i, y.load(i) + xmm1*factor );
3603  }
3604 
3605  for( ; remainder && i<M; ++i )
3606  {
3607  const size_t jbegin( ( IsUpper<MT1>::value )
3608  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3609  :( 0UL ) );
3610  const size_t jend( ( IsLower<MT1>::value )
3611  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3612  :( N ) );
3613  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3614 
3615  ElementType value = ElementType();
3616 
3617  for( size_t j=jbegin; j<jend; ++j ) {
3618  value += A(i,j) * x[j];
3619  }
3620 
3621  y[i] += value * scalar;
3622  }
3623  }
3624  //**********************************************************************************************
3625 
3626  //**Default addition assignment to dense vectors (large matrices)*******************************
3640  template< typename VT1 // Type of the left-hand side target vector
3641  , typename MT1 // Type of the left-hand side matrix operand
3642  , typename VT2 // Type of the right-hand side vector operand
3643  , typename ST2 > // Type of the scalar value
3645  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3646  {
3647  selectDefaultAddAssignKernel( y, A, x, scalar );
3648  }
3649  //**********************************************************************************************
3650 
3651  //**Vectorized default addition assignment to dense vectors (large matrices)********************
3666  template< typename VT1 // Type of the left-hand side target vector
3667  , typename MT1 // Type of the left-hand side matrix operand
3668  , typename VT2 // Type of the right-hand side vector operand
3669  , typename ST2 > // Type of the scalar value
3671  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3672  {
3673  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3674 
3675  const size_t M( A.rows() );
3676  const size_t N( A.columns() );
3677 
3678  const size_t iblock( 32768UL / sizeof( ElementType ) );
3679  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3680 
3681  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
3682 
3683  const SIMDType factor( set( scalar ) );
3684 
3685  for( size_t ii=0U; ii<M; ii+=iblock ) {
3686  for( size_t jj=0UL; jj<N; jj+=jblock )
3687  {
3688  const size_t jend( min( jj+jblock, N ) );
3689  const size_t itmp( min( ii+iblock, M ) );
3690  const size_t iend( ( IsUpper<MT1>::value )
3691  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3692  :( itmp ) );
3693 
3694  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
3695  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
3696 
3697  size_t i( ( IsLower<MT1>::value )
3698  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
3699  :( ii ) );
3700 
3701  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
3702  {
3703  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3704 
3705  for( size_t j=jj; j<jend; ++j ) {
3706  const SIMDType x1( set( x[j] ) );
3707  xmm1 += A.load(i ,j) * x1;
3708  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3709  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3710  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3711  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
3712  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
3713  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
3714  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
3715  }
3716 
3717  y.store( i , y.load(i ) + xmm1*factor );
3718  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3719  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3720  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3721  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) + xmm5*factor );
3722  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) + xmm6*factor );
3723  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) + xmm7*factor );
3724  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) + xmm8*factor );
3725  }
3726 
3727  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
3728  {
3729  SIMDType xmm1, xmm2, xmm3, xmm4;
3730 
3731  for( size_t j=jj; j<jend; ++j ) {
3732  const SIMDType x1( set( x[j] ) );
3733  xmm1 += A.load(i ,j) * x1;
3734  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3735  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3736  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
3737  }
3738 
3739  y.store( i , y.load(i ) + xmm1*factor );
3740  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3741  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3742  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) + xmm4*factor );
3743  }
3744 
3745  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
3746  {
3747  SIMDType xmm1, xmm2, xmm3;
3748 
3749  for( size_t j=jj; j<jend; ++j ) {
3750  const SIMDType x1( set( x[j] ) );
3751  xmm1 += A.load(i ,j) * x1;
3752  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
3753  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
3754  }
3755 
3756  y.store( i , y.load(i ) + xmm1*factor );
3757  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) + xmm2*factor );
3758  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) + xmm3*factor );
3759  }
3760 
3761  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3762  {
3763  SIMDType xmm1, xmm2;
3764 
3765  for( size_t j=jj; j<jend; ++j ) {
3766  const SIMDType x1( set( x[j] ) );
3767  xmm1 += A.load(i ,j) * x1;
3768  xmm2 += A.load(i+SIMDSIZE,j) * x1;
3769  }
3770 
3771  y.store( i , y.load(i ) + xmm1*factor );
3772  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) + xmm2*factor );
3773  }
3774 
3775  for( ; i<ipos; i+=SIMDSIZE )
3776  {
3777  SIMDType xmm1;
3778 
3779  for( size_t j=jj; j<jend; ++j ) {
3780  xmm1 += A.load(i,j) * set( x[j] );
3781  }
3782 
3783  y.store( i, y.load(i) + xmm1*factor );
3784  }
3785 
3786  for( ; remainder && i<iend; ++i )
3787  {
3788  ElementType value = ElementType();
3789 
3790  for( size_t j=jj; j<jend; ++j ) {
3791  value += A(i,j) * x[j];
3792  }
3793 
3794  y[i] += value * scalar;
3795  }
3796  }
3797  }
3798  }
3799  //**********************************************************************************************
3800 
3801  //**BLAS-based addition assignment to dense vectors (default)***********************************
3815  template< typename VT1 // Type of the left-hand side target vector
3816  , typename MT1 // Type of the left-hand side matrix operand
3817  , typename VT2 // Type of the right-hand side vector operand
3818  , typename ST2 > // Type of the scalar value
3820  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3821  {
3822  selectLargeAddAssignKernel( y, A, x, scalar );
3823  }
3824  //**********************************************************************************************
3825 
3826  //**BLAS-based addition assignment to dense vectors*********************************************
3827 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
3828 
3841  template< typename VT1 // Type of the left-hand side target vector
3842  , typename MT1 // Type of the left-hand side matrix operand
3843  , typename VT2 // Type of the right-hand side vector operand
3844  , typename ST2 > // Type of the scalar value
3846  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3847  {
3848  using ET = ElementType_<VT1>;
3849 
3850  if( IsTriangular<MT1>::value ) {
3851  ResultType_<VT1> tmp( serial( scalar * x ) );
3852  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3853  addAssign( y, tmp );
3854  }
3855  else {
3856  gemv( y, A, x, ET(scalar), ET(1) );
3857  }
3858  }
3859 #endif
3860  //**********************************************************************************************
3861 
3862  //**Addition assignment to sparse vectors*******************************************************
3863  // No special implementation for the addition assignment to sparse vectors.
3864  //**********************************************************************************************
3865 
3866  //**Subtraction assignment to dense vectors*****************************************************
3878  template< typename VT1 > // Type of the target dense vector
3879  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3880  {
3882 
3883  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3884 
3885  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
3886  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
3887 
3888  if( left.rows() == 0UL || left.columns() == 0UL ) {
3889  return;
3890  }
3891 
3892  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3893  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3894 
3895  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3896  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3897  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3898  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3899 
3900  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
3901  }
3902  //**********************************************************************************************
3903 
3904  //**Subtraction assignment to dense vectors (kernel selection)**********************************
3915  template< typename VT1 // Type of the left-hand side target vector
3916  , typename MT1 // Type of the left-hand side matrix operand
3917  , typename VT2 // Type of the right-hand side vector operand
3918  , typename ST2 > // Type of the scalar value
3919  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3920  {
3921  if( ( IsDiagonal<MT1>::value ) ||
3922  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3923  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3924  selectSmallSubAssignKernel( y, A, x, scalar );
3925  else
3926  selectBlasSubAssignKernel( y, A, x, scalar );
3927  }
3928  //**********************************************************************************************
3929 
3930  //**Default subtraction assignment to dense vectors*********************************************
3944  template< typename VT1 // Type of the left-hand side target vector
3945  , typename MT1 // Type of the left-hand side matrix operand
3946  , typename VT2 // Type of the right-hand side vector operand
3947  , typename ST2 > // Type of the scalar value
3948  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3949  {
3950  y.subAssign( A * x * scalar );
3951  }
3952  //**********************************************************************************************
3953 
3954  //**Default subtraction assignment to dense vectors (small matrices)****************************
3968  template< typename VT1 // Type of the left-hand side target vector
3969  , typename MT1 // Type of the left-hand side matrix operand
3970  , typename VT2 // Type of the right-hand side vector operand
3971  , typename ST2 > // Type of the scalar value
3973  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3974  {
3975  selectDefaultSubAssignKernel( y, A, x, scalar );
3976  }
3977  //**********************************************************************************************
3978 
3979  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
3994  template< typename VT1 // Type of the left-hand side target vector
3995  , typename MT1 // Type of the left-hand side matrix operand
3996  , typename VT2 // Type of the right-hand side vector operand
3997  , typename ST2 > // Type of the scalar value
3999  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4000  {
4001  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
4002 
4003  const size_t M( A.rows() );
4004  const size_t N( A.columns() );
4005 
4006  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
4007  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
4008 
4009  const SIMDType factor( set( scalar ) );
4010 
4011  size_t i( 0UL );
4012 
4013  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
4014  {
4015  const size_t jbegin( ( IsUpper<MT1>::value )
4016  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4017  :( 0UL ) );
4018  const size_t jend( ( IsLower<MT1>::value )
4019  ?( min( i+SIMDSIZE*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4020  :( N ) );
4021  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4022 
4023  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4024 
4025  for( size_t j=jbegin; j<jend; ++j ) {
4026  const SIMDType x1( set( x[j] ) );
4027  xmm1 += A.load(i ,j) * x1;
4028  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4029  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4030  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4031  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
4032  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
4033  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
4034  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
4035  }
4036 
4037  y.store( i , y.load(i ) - xmm1*factor );
4038  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4039  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4040  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4041  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) - xmm5*factor );
4042  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) - xmm6*factor );
4043  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) - xmm7*factor );
4044  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) - xmm8*factor );
4045  }
4046 
4047  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
4048  {
4049  const size_t jbegin( ( IsUpper<MT1>::value )
4050  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4051  :( 0UL ) );
4052  const size_t jend( ( IsLower<MT1>::value )
4053  ?( min( i+SIMDSIZE*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4054  :( N ) );
4055  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4056 
4057  SIMDType xmm1, xmm2, xmm3, xmm4;
4058 
4059  for( size_t j=jbegin; j<jend; ++j ) {
4060  const SIMDType x1( set( x[j] ) );
4061  xmm1 += A.load(i ,j) * x1;
4062  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4063  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4064  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4065  }
4066 
4067  y.store( i , y.load(i ) - xmm1*factor );
4068  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4069  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4070  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4071  }
4072 
4073  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
4074  {
4075  const size_t jbegin( ( IsUpper<MT1>::value )
4076  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4077  :( 0UL ) );
4078  const size_t jend( ( IsLower<MT1>::value )
4079  ?( min( i+SIMDSIZE*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4080  :( N ) );
4081  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4082 
4083  SIMDType xmm1, xmm2, xmm3;
4084 
4085  for( size_t j=jbegin; j<jend; ++j ) {
4086  const SIMDType x1( set( x[j] ) );
4087  xmm1 += A.load(i ,j) * x1;
4088  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4089  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4090  }
4091 
4092  y.store( i , y.load(i ) - xmm1*factor );
4093  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4094  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4095  }
4096 
4097  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
4098  {
4099  const size_t jbegin( ( IsUpper<MT1>::value )
4100  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4101  :( 0UL ) );
4102  const size_t jend( ( IsLower<MT1>::value )
4103  ?( min( i+SIMDSIZE*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4104  :( N ) );
4105  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4106 
4107  SIMDType xmm1, xmm2;
4108 
4109  for( size_t j=jbegin; j<jend; ++j ) {
4110  const SIMDType x1( set( x[j] ) );
4111  xmm1 += A.load(i ,j) * x1;
4112  xmm2 += A.load(i+SIMDSIZE,j) * x1;
4113  }
4114 
4115  y.store( i , y.load(i ) - xmm1*factor );
4116  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) - xmm2*factor );
4117  }
4118 
4119  for( ; i<ipos; i+=SIMDSIZE )
4120  {
4121  const size_t jbegin( ( IsUpper<MT1>::value )
4122  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4123  :( 0UL ) );
4124  const size_t jend( ( IsLower<MT1>::value )
4125  ?( min( i+SIMDSIZE, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4126  :( N ) );
4127  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4128 
4129  SIMDType xmm1;
4130 
4131  for( size_t j=jbegin; j<jend; ++j ) {
4132  xmm1 += A.load(i,j) * set( x[j] );
4133  }
4134 
4135  y.store( i, y.load(i) - xmm1*factor );
4136  }
4137 
4138  for( ; remainder && i<M; ++i )
4139  {
4140  const size_t jbegin( ( IsUpper<MT1>::value )
4141  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4142  :( 0UL ) );
4143  const size_t jend( ( IsLower<MT1>::value )
4144  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4145  :( N ) );
4146  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4147 
4148  ElementType value = ElementType();
4149 
4150  for( size_t j=jbegin; j<jend; ++j ) {
4151  value += A(i,j) * x[j];
4152  }
4153 
4154  y[i] -= value * scalar;
4155  }
4156  }
4157  //**********************************************************************************************
4158 
4159  //**Default subtraction assignment to dense vectors (large matrices)****************************
4173  template< typename VT1 // Type of the left-hand side target vector
4174  , typename MT1 // Type of the left-hand side matrix operand
4175  , typename VT2 // Type of the right-hand side vector operand
4176  , typename ST2 > // Type of the scalar value
4178  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4179  {
4180  selectDefaultSubAssignKernel( y, A, x, scalar );
4181  }
4182  //**********************************************************************************************
4183 
4184  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4199  template< typename VT1 // Type of the left-hand side target vector
4200  , typename MT1 // Type of the left-hand side matrix operand
4201  , typename VT2 // Type of the right-hand side vector operand
4202  , typename ST2 > // Type of the scalar value
4204  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4205  {
4206  constexpr bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
4207 
4208  const size_t M( A.rows() );
4209  const size_t N( A.columns() );
4210 
4211  const size_t iblock( 32768UL / sizeof( ElementType ) );
4212  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
4213 
4214  BLAZE_INTERNAL_ASSERT( ( iblock % SIMDSIZE ) == 0UL, "Invalid block size detected" );
4215 
4216  const SIMDType factor( set( scalar ) );
4217 
4218  for( size_t ii=0U; ii<M; ii+=iblock ) {
4219  for( size_t jj=0UL; jj<N; jj+=jblock )
4220  {
4221  const size_t jend( min( jj+jblock, N ) );
4222  const size_t itmp( min( ii+iblock, M ) );
4223  const size_t iend( ( IsUpper<MT1>::value )
4224  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
4225  :( itmp ) );
4226 
4227  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
4228  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
4229 
4230  size_t i( ( IsLower<MT1>::value )
4231  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-SIMDSIZE) ) )
4232  :( ii ) );
4233 
4234  for( ; (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL )
4235  {
4236  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4237 
4238  for( size_t j=jj; j<jend; ++j ) {
4239  const SIMDType x1( set( x[j] ) );
4240  xmm1 += A.load(i ,j) * x1;
4241  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4242  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4243  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4244  xmm5 += A.load(i+SIMDSIZE*4UL,j) * x1;
4245  xmm6 += A.load(i+SIMDSIZE*5UL,j) * x1;
4246  xmm7 += A.load(i+SIMDSIZE*6UL,j) * x1;
4247  xmm8 += A.load(i+SIMDSIZE*7UL,j) * x1;
4248  }
4249 
4250  y.store( i , y.load(i ) - xmm1*factor );
4251  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4252  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4253  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4254  y.store( i+SIMDSIZE*4UL, y.load(i+SIMDSIZE*4UL) - xmm5*factor );
4255  y.store( i+SIMDSIZE*5UL, y.load(i+SIMDSIZE*5UL) - xmm6*factor );
4256  y.store( i+SIMDSIZE*6UL, y.load(i+SIMDSIZE*6UL) - xmm7*factor );
4257  y.store( i+SIMDSIZE*7UL, y.load(i+SIMDSIZE*7UL) - xmm8*factor );
4258  }
4259 
4260  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
4261  {
4262  SIMDType xmm1, xmm2, xmm3, xmm4;
4263 
4264  for( size_t j=jj; j<jend; ++j ) {
4265  const SIMDType x1( set( x[j] ) );
4266  xmm1 += A.load(i ,j) * x1;
4267  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4268  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4269  xmm4 += A.load(i+SIMDSIZE*3UL,j) * x1;
4270  }
4271 
4272  y.store( i , y.load(i ) - xmm1*factor );
4273  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4274  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4275  y.store( i+SIMDSIZE*3UL, y.load(i+SIMDSIZE*3UL) - xmm4*factor );
4276  }
4277 
4278  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
4279  {
4280  SIMDType xmm1, xmm2, xmm3;
4281 
4282  for( size_t j=jj; j<jend; ++j ) {
4283  const SIMDType x1( set( x[j] ) );
4284  xmm1 += A.load(i ,j) * x1;
4285  xmm2 += A.load(i+SIMDSIZE ,j) * x1;
4286  xmm3 += A.load(i+SIMDSIZE*2UL,j) * x1;
4287  }
4288 
4289  y.store( i , y.load(i ) - xmm1*factor );
4290  y.store( i+SIMDSIZE , y.load(i+SIMDSIZE ) - xmm2*factor );
4291  y.store( i+SIMDSIZE*2UL, y.load(i+SIMDSIZE*2UL) - xmm3*factor );
4292  }
4293 
4294  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
4295  {
4296  SIMDType xmm1, xmm2;
4297 
4298  for( size_t j=jj; j<jend; ++j ) {
4299  const SIMDType x1( set( x[j] ) );
4300  xmm1 += A.load(i ,j) * x1;
4301  xmm2 += A.load(i+SIMDSIZE,j) * x1;
4302  }
4303 
4304  y.store( i , y.load(i ) - xmm1*factor );
4305  y.store( i+SIMDSIZE, y.load(i+SIMDSIZE) - xmm2*factor );
4306  }
4307 
4308  for( ; i<ipos; i+=SIMDSIZE )
4309  {
4310  SIMDType xmm1;
4311 
4312  for( size_t j=jj; j<jend; ++j ) {
4313  xmm1 += A.load(i,j) * set( x[j] );
4314  }
4315 
4316  y.store( i, y.load(i) - xmm1*factor );
4317  }
4318 
4319  for( ; remainder && i<iend; ++i )
4320  {
4321  ElementType value = ElementType();
4322 
4323  for( size_t j=jj; j<jend; ++j ) {
4324  value += A(i,j) * x[j];
4325  }
4326 
4327  y[i] -= value * scalar;
4328  }
4329  }
4330  }
4331  }
4332  //**********************************************************************************************
4333 
4334  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4348  template< typename VT1 // Type of the left-hand side target vector
4349  , typename MT1 // Type of the left-hand side matrix operand
4350  , typename VT2 // Type of the right-hand side vector operand
4351  , typename ST2 > // Type of the scalar value
4353  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4354  {
4355  selectLargeSubAssignKernel( y, A, x, scalar );
4356  }
4357  //**********************************************************************************************
4358 
4359  //**BLAS-based subtraction assignment to dense vectors******************************************
4360 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
4361 
4374  template< typename VT1 // Type of the left-hand side target vector
4375  , typename MT1 // Type of the left-hand side matrix operand
4376  , typename VT2 // Type of the right-hand side vector operand
4377  , typename ST2 > // Type of the scalar value
4379  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4380  {
4381  using ET = ElementType_<VT1>;
4382 
4383  if( IsTriangular<MT1>::value ) {
4384  ResultType_<VT1> tmp( serial( scalar * x ) );
4385  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4386  subAssign( y, tmp );
4387  }
4388  else {
4389  gemv( y, A, x, ET(-scalar), ET(1) );
4390  }
4391  }
4392 #endif
4393  //**********************************************************************************************
4394 
4395  //**Subtraction assignment to sparse vectors****************************************************
4396  // No special implementation for the subtraction assignment to sparse vectors.
4397  //**********************************************************************************************
4398 
4399  //**Multiplication assignment to dense vectors**************************************************
4411  template< typename VT1 > // Type of the target dense vector
4412  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4413  {
4415 
4419 
4420  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4421 
4422  const ResultType tmp( serial( rhs ) );
4423  multAssign( ~lhs, tmp );
4424  }
4425  //**********************************************************************************************
4426 
4427  //**Multiplication assignment to sparse vectors*************************************************
4428  // No special implementation for the multiplication assignment to sparse vectors.
4429  //**********************************************************************************************
4430 
4431  //**Division assignment to dense vectors********************************************************
4443  template< typename VT1 > // Type of the target dense vector
4444  friend inline void divAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4445  {
4447 
4451 
4452  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4453 
4454  const ResultType tmp( serial( rhs ) );
4455  divAssign( ~lhs, tmp );
4456  }
4457  //**********************************************************************************************
4458 
4459  //**Division assignment to sparse vectors*******************************************************
4460  // No special implementation for the division assignment to sparse vectors.
4461  //**********************************************************************************************
4462 
4463  //**SMP assignment to dense vectors*************************************************************
4477  template< typename VT1 > // Type of the target dense vector
4478  friend inline EnableIf_< UseSMPAssign<VT1> >
4480  {
4482 
4483  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4484 
4485  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
4486  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
4487 
4488  if( left.rows() == 0UL ) {
4489  return;
4490  }
4491  else if( left.columns() == 0UL ) {
4492  reset( ~lhs );
4493  return;
4494  }
4495 
4496  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4497  RT x( right ); // Evaluation of the right-hand side dense vector operand
4498 
4499  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4500  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4501  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4502  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4503 
4504  smpAssign( ~lhs, A * x * rhs.scalar_ );
4505  }
4506  //**********************************************************************************************
4507 
4508  //**SMP assignment to sparse vectors************************************************************
4522  template< typename VT1 > // Type of the target sparse vector
4523  friend inline EnableIf_< UseSMPAssign<VT1> >
4525  {
4527 
4531 
4532  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4533 
4534  const ResultType tmp( rhs );
4535  smpAssign( ~lhs, tmp );
4536  }
4537  //**********************************************************************************************
4538 
4539  //**SMP addition assignment to dense vectors****************************************************
4553  template< typename VT1 > // Type of the target dense vector
4554  friend inline EnableIf_< UseSMPAssign<VT1> >
4556  {
4558 
4559  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4560 
4561  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
4562  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
4563 
4564  if( left.rows() == 0UL || left.columns() == 0UL ) {
4565  return;
4566  }
4567 
4568  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4569  RT x( right ); // Evaluation of the right-hand side dense vector operand
4570 
4571  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4572  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4573  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4574  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4575 
4576  smpAddAssign( ~lhs, A * x * rhs.scalar_ );
4577  }
4578  //**********************************************************************************************
4579 
4580  //**SMP addition assignment to sparse vectors***************************************************
4581  // No special implementation for the SMP addition assignment to sparse vectors.
4582  //**********************************************************************************************
4583 
4584  //**SMP subtraction assignment to dense vectors*************************************************
4598  template< typename VT1 > // Type of the target dense vector
4599  friend inline EnableIf_< UseSMPAssign<VT1> >
4601  {
4603 
4604  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4605 
4606  LeftOperand_<MVM> left ( rhs.vector_.leftOperand() );
4607  RightOperand_<MVM> right( rhs.vector_.rightOperand() );
4608 
4609  if( left.rows() == 0UL || left.columns() == 0UL ) {
4610  return;
4611  }
4612 
4613  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4614  RT x( right ); // Evaluation of the right-hand side dense vector operand
4615 
4616  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4617  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4618  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4619  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4620 
4621  smpSubAssign( ~lhs, A * x * rhs.scalar_ );
4622  }
4623  //**********************************************************************************************
4624 
4625  //**SMP subtraction assignment to sparse vectors************************************************
4626  // No special implementation for the SMP subtraction assignment to sparse vectors.
4627  //**********************************************************************************************
4628 
4629  //**SMP multiplication assignment to dense vectors**********************************************
4644  template< typename VT1 > // Type of the target dense vector
4645  friend inline EnableIf_< UseSMPAssign<VT1> >
4647  {
4649 
4653 
4654  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4655 
4656  const ResultType tmp( rhs );
4657  smpMultAssign( ~lhs, tmp );
4658  }
4659  //**********************************************************************************************
4660 
4661  //**SMP multiplication assignment to sparse vectors*********************************************
4662  // No special implementation for the SMP multiplication assignment to sparse vectors.
4663  //**********************************************************************************************
4664 
4665  //**SMP division assignment to dense vectors****************************************************
4679  template< typename VT1 > // Type of the target dense vector
4680  friend inline EnableIf_< UseSMPAssign<VT1> >
4682  {
4684 
4688 
4689  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4690 
4691  const ResultType tmp( rhs );
4692  smpDivAssign( ~lhs, tmp );
4693  }
4694  //**********************************************************************************************
4695 
4696  //**SMP division assignment to sparse vectors***************************************************
4697  // No special implementation for the SMP division assignment to sparse vectors.
4698  //**********************************************************************************************
4699 
4700  //**Compile time checks*************************************************************************
4709  //**********************************************************************************************
4710 };
4712 //*************************************************************************************************
4713 
4714 
4715 
4716 
4717 //=================================================================================================
4718 //
4719 // GLOBAL BINARY ARITHMETIC OPERATORS
4720 //
4721 //=================================================================================================
4722 
4723 //*************************************************************************************************
4754 template< typename MT // Type of the left-hand side dense matrix
4755  , typename VT > // Type of the right-hand side dense vector
4756 inline decltype(auto)
4757  operator*( const DenseMatrix<MT,true>& mat, const DenseVector<VT,false>& vec )
4758 {
4760 
4762 
4763  if( (~mat).columns() != (~vec).size() ) {
4764  BLAZE_THROW_INVALID_ARGUMENT( "Matrix and vector sizes do not match" );
4765  }
4766 
4767  using ReturnType = const TDMatDVecMultExpr<MT,VT>;
4768  return ReturnType( ~mat, ~vec );
4769 }
4770 //*************************************************************************************************
4771 
4772 
4773 
4774 
4775 //=================================================================================================
4776 //
4777 // SIZE SPECIALIZATIONS
4778 //
4779 //=================================================================================================
4780 
4781 //*************************************************************************************************
4783 template< typename MT, typename VT >
4784 struct Size< TDMatDVecMultExpr<MT,VT> >
4785  : public Rows<MT>
4786 {};
4788 //*************************************************************************************************
4789 
4790 
4791 
4792 
4793 //=================================================================================================
4794 //
4795 // ISALIGNED SPECIALIZATIONS
4796 //
4797 //=================================================================================================
4798 
4799 //*************************************************************************************************
4801 template< typename MT, typename VT >
4802 struct IsAligned< TDMatDVecMultExpr<MT,VT> >
4803  : public BoolConstant< And< IsAligned<MT>, IsAligned<VT> >::value >
4804 {};
4806 //*************************************************************************************************
4807 
4808 } // namespace blaze
4809 
4810 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Data type constraint.
IfTrue_< evaluateMatrix, const MRT, MCT > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:219
Headerfile for the generic min algorithm.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:209
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
ReturnType at(size_t index) const
Checked access to the vector elements.
Definition: TDMatDVecMultExpr.h:294
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
ElementType_< VRT > VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:131
Subvector< VT, AF > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:322
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: ColumnVector.h:61
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the IsSame and IsStrictlySame type traits.
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:198
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:172
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:316
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:560
ResultType_< MT > MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:128
CompositeType_< VT > VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:133
EnableIf_< IsDenseVector< VT1 > > smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:193
typename DisableIf< Condition, T >::Type DisableIf_
Auxiliary type for the DisableIf class template.The DisableIf_ alias declaration provides a convenien...
Definition: DisableIf.h:224
size_t size() const noexcept
Returns the current size/dimension of the vector.
Definition: DVecScalarMultExpr.h:521
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:171
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:250
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member constant is set to true, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to false, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:140
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the VecScalarMultExpr base class.
Header file for the IsBLASCompatible type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:343
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1809
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:78
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:149
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Header file for the IsComplexDouble type trait.
MultTrait_< MRT, VRT > ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:205
Constraint on the transpose flag of vector types.
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
Row< MT > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:124
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Headerfile for the generic max algorithm.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:371
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:361
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:210
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:262
Header file for the HasSIMDAdd type trait.
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:339
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:317
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:71
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:340
CompositeType_< MT > MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:132
#define BLAZE_CONSTRAINT_MUST_NOT_BE_MATMATMULTEXPR_TYPE(T)
Constraint on the data type.In case the given data type T is a matrix/matrix multiplication expressio...
Definition: MatMatMultExpr.h:88
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBLASCompatible.h:80
Header file for the IsTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:109
ResultType_< VT > VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:129
Constraint on the data type.
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatDVecMultExpr.h:208
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Constraint on the data type.
LeftOperand vector_
Left-hand side dense vector of the multiplication expression.
Definition: DVecScalarMultExpr.h:592
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
typename T::LeftOperand LeftOperand_
Alias declaration for nested LeftOperand type definitions.The LeftOperand_ alias declaration provides...
Definition: Aliases.h:203
Header file for the HasConstDataAccess type trait.
RightOperand scalar_
Right-hand side scalar of the multiplication expression.
Definition: DVecScalarMultExpr.h:593
If_< IsExpression< MT >, const MT, const MT &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:213
System settings for the BLAS mode.
Base class for all vector/scalar multiplication expression templates.The VecScalarMultExpr class serv...
Definition: VecScalarMultExpr.h:67
ElementType_< ResultType > ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:207
Header file for the IsSIMDCombinable type trait.
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
IfTrue_< evaluateVector, const VRT, VCT > RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:222
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:154
size_t size() const noexcept
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:307
Header file for BLAS triangular matrix/vector multiplication functions (trmv)
EnableIf_< IsDenseVector< VT1 > > smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:222
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:61
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:206
Compile time check for built-in data types.This type trait tests whether or not the given template pa...
Definition: IsBuiltin.h:75
ElementType_< MRT > MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:130
Constraints on the storage order of matrix types.
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:819
#define BLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION
Compilation switch for the BLAS matrix/vector multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:93
Header file for the HasMutableDataAccess type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the BLAS matrix/matrix multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:152
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:224
Header file for BLAS general matrix/vector multiplication functions (gemv)
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3082
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:61
Compile time check for complex types.This type trait tests whether or not the given template paramete...
Definition: IsComplex.h:76
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:64
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Expression object for dense vector-scalar multiplications.The DVecScalarMultExpr class represents the...
Definition: DVecScalarMultExpr.h:106
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:66
Compile time evaluation of the size of a vector.The Size type trait evaluates the size of the given v...
Definition: Size.h:74
RightOperand rightOperand() const noexcept
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:327
Base class for sparse vectors.The SparseVector class is a base class for all arbitrarily sized (N-dim...
Definition: Forward.h:130
Header file for the IsComplexFloat type trait.
Header file for the IntegralConstant class template.
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:351
Compile time evaluation of the number of rows of a matrix.The Rows type trait evaluates the number of...
Definition: Rows.h:75
Header file for the IsComplex type trait.
Constraint on the data type.
Header file for the complex data type.
typename T::RightOperand RightOperand_
Alias declaration for nested RightOperand type definitions.The RightOperand_ alias declaration provid...
Definition: Aliases.h:383
If_< IsExpression< VT >, const VT, const VT &> RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:216
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:423
Header file for the IsUpper type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:383
Header file for the MatVecMultExpr base class.
Constraint on the data type.
TDMatDVecMultExpr(const MT &mat, const VT &vec) noexcept
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:248
Header file for the Size type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:384
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the function trace functionality.