TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <blaze/math/blas/Level2.h>
55 #include <blaze/math/Functions.h>
56 #include <blaze/math/Intrinsics.h>
57 #include <blaze/math/shims/Reset.h>
78 #include <blaze/system/BLAS.h>
80 #include <blaze/util/Assert.h>
81 #include <blaze/util/Complex.h>
84 #include <blaze/util/DisableIf.h>
85 #include <blaze/util/EnableIf.h>
87 #include <blaze/util/SelectType.h>
88 #include <blaze/util/Types.h>
94 
95 
96 namespace blaze {
97 
98 //=================================================================================================
99 //
100 // CLASS TDMATDVECMULTEXPR
101 //
102 //=================================================================================================
103 
104 //*************************************************************************************************
111 template< typename MT // Type of the left-hand side dense matrix
112  , typename VT > // Type of the right-hand side dense vector
113 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
114  , private MatVecMultExpr
115  , private Computation
116 {
117  private:
118  //**Type definitions****************************************************************************
119  typedef typename MT::ResultType MRT;
120  typedef typename VT::ResultType VRT;
121  typedef typename MRT::ElementType MET;
122  typedef typename VRT::ElementType VET;
123  typedef typename MT::CompositeType MCT;
124  typedef typename VT::CompositeType VCT;
125  //**********************************************************************************************
126 
127  //**********************************************************************************************
129  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
131  //**********************************************************************************************
132 
133  //**********************************************************************************************
135  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
136  //**********************************************************************************************
137 
138  //**********************************************************************************************
140 
144  template< typename T1 >
145  struct UseSMPAssign {
146  enum { value = ( evaluateMatrix || evaluateVector ) };
147  };
149  //**********************************************************************************************
150 
151  //**********************************************************************************************
153 
157  template< typename T1, typename T2, typename T3 >
158  struct UseSinglePrecisionKernel {
159  enum { value = BLAZE_BLAS_MODE &&
164  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
168  };
170  //**********************************************************************************************
171 
172  //**********************************************************************************************
174 
178  template< typename T1, typename T2, typename T3 >
179  struct UseDoublePrecisionKernel {
180  enum { value = BLAZE_BLAS_MODE &&
185  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
189  };
191  //**********************************************************************************************
192 
193  //**********************************************************************************************
195 
199  template< typename T1, typename T2, typename T3 >
200  struct UseSinglePrecisionComplexKernel {
201  typedef complex<float> Type;
202  enum { value = BLAZE_BLAS_MODE &&
207  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
211  };
213  //**********************************************************************************************
214 
215  //**********************************************************************************************
217 
221  template< typename T1, typename T2, typename T3 >
222  struct UseDoublePrecisionComplexKernel {
223  typedef complex<double> Type;
224  enum { value = BLAZE_BLAS_MODE &&
229  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
233  };
235  //**********************************************************************************************
236 
237  //**********************************************************************************************
239 
242  template< typename T1, typename T2, typename T3 >
243  struct UseDefaultKernel {
244  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
245  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
246  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
247  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
248  };
250  //**********************************************************************************************
251 
252  //**********************************************************************************************
254 
258  template< typename T1, typename T2, typename T3 >
259  struct UseVectorizedDefaultKernel {
260  enum { value = !IsDiagonal<T2>::value &&
261  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
266  };
268  //**********************************************************************************************
269 
270  public:
271  //**Type definitions****************************************************************************
277  typedef const ElementType ReturnType;
278  typedef const ResultType CompositeType;
279 
281  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
282 
284  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
285 
288 
291  //**********************************************************************************************
292 
293  //**Compilation flags***************************************************************************
295  enum { vectorizable = !IsDiagonal<MT>::value &&
296  MT::vectorizable && VT::vectorizable &&
300 
302  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
303  !evaluateVector && VT::smpAssignable };
304  //**********************************************************************************************
305 
306  //**Constructor*********************************************************************************
312  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
313  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
314  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
315  {
316  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
317  }
318  //**********************************************************************************************
319 
320  //**Subscript operator**************************************************************************
326  inline ReturnType operator[]( size_t index ) const {
327  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
328 
329  if( ( IsStrictlyLower<MT>::value && index == 0UL ) ||
330  ( IsStrictlyUpper<MT>::value && index == mat_.rows()-1UL ) ||
331  mat_.columns() == 0UL )
332  return ElementType();
333 
335  return mat_(index,index) * vec_[index];
336 
337  const size_t jbegin( ( IsUpper<MT>::value )
338  ?( IsStrictlyUpper<MT>::value ? index+1UL : index )
339  :( 0UL ) );
340  const size_t jend( ( IsLower<MT>::value )
341  ?( IsStrictlyLower<MT>::value ? index : index+1UL )
342  :( mat_.columns() ) );
343  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
344 
345  const size_t jnum( jend - jbegin );
346  const size_t jpos( jbegin + ( ( jnum - 1UL ) & size_t(-2) ) + 1UL );
347 
348  ElementType res( mat_(index,jbegin) * vec_[jbegin] );
349 
350  for( size_t j=jbegin+1UL; j<jpos; j+=2UL ) {
351  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
352  }
353  if( jpos < jend ) {
354  res += mat_(index,jpos) * vec_[jpos];
355  }
356 
357  return res;
358  }
359  //**********************************************************************************************
360 
361  //**Size function*******************************************************************************
366  inline size_t size() const {
367  return mat_.rows();
368  }
369  //**********************************************************************************************
370 
371  //**Left operand access*************************************************************************
376  inline LeftOperand leftOperand() const {
377  return mat_;
378  }
379  //**********************************************************************************************
380 
381  //**Right operand access************************************************************************
386  inline RightOperand rightOperand() const {
387  return vec_;
388  }
389  //**********************************************************************************************
390 
391  //**********************************************************************************************
397  template< typename T >
398  inline bool canAlias( const T* alias ) const {
399  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
400  }
401  //**********************************************************************************************
402 
403  //**********************************************************************************************
409  template< typename T >
410  inline bool isAliased( const T* alias ) const {
411  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
412  }
413  //**********************************************************************************************
414 
415  //**********************************************************************************************
420  inline bool isAligned() const {
421  return mat_.isAligned() && vec_.isAligned();
422  }
423  //**********************************************************************************************
424 
425  //**********************************************************************************************
430  inline bool canSMPAssign() const {
431  return ( !BLAZE_BLAS_IS_PARALLEL ||
432  ( IsComputation<MT>::value && !evaluateMatrix ) ||
433  ( mat_.rows() * mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
435  }
436  //**********************************************************************************************
437 
438  private:
439  //**Member variables****************************************************************************
440  LeftOperand mat_;
441  RightOperand vec_;
442  //**********************************************************************************************
443 
444  //**Assignment to dense vectors*****************************************************************
457  template< typename VT1 > // Type of the target dense vector
458  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
459  {
461 
462  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
463 
464  if( rhs.mat_.rows() == 0UL ) {
465  return;
466  }
467  else if( rhs.mat_.columns() == 0UL ) {
468  reset( ~lhs );
469  return;
470  }
471 
472  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
473  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
474 
475  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
476  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
477  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
478  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
479 
480  TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
481  }
483  //**********************************************************************************************
484 
485  //**Assignment to dense vectors (kernel selection)**********************************************
496  template< typename VT1 // Type of the left-hand side target vector
497  , typename MT1 // Type of the left-hand side matrix operand
498  , typename VT2 > // Type of the right-hand side vector operand
499  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
500  {
501  if( ( IsDiagonal<MT1>::value ) ||
502  ( IsComputation<MT>::value && !evaluateMatrix ) ||
503  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
504  selectSmallAssignKernel( y, A, x );
505  else
506  selectBlasAssignKernel( y, A, x );
507  }
509  //**********************************************************************************************
510 
511  //**Default assignment to dense vectors*********************************************************
525  template< typename VT1 // Type of the left-hand side target vector
526  , typename MT1 // Type of the left-hand side matrix operand
527  , typename VT2 > // Type of the right-hand side vector operand
528  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
529  {
530  const size_t M( A.rows() );
531  const size_t N( A.columns() );
532 
533  if( IsStrictlyLower<MT1>::value ) {
534  reset( y[0] );
535  }
536 
537  if( !IsUpper<MT1>::value )
538  {
539  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
540  y[i] = A(i,0UL) * x[0UL];
541  }
542  }
543 
544  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
545  {
546  if( IsDiagonal<MT1>::value )
547  {
548  y[j] = A(j,j) * x[j];
549  }
550  else
551  {
552  const size_t ibegin( ( IsLower<MT1>::value )
553  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
554  :( 0UL ) );
555  const size_t iend( ( IsUpper<MT1>::value )
556  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
557  :( M ) );
558  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
559 
560  const size_t inum( iend - ibegin );
561  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
562 
563  for( size_t i=ibegin; i<ipos; i+=2UL ) {
564  y[i ] += A(i ,j) * x[j];
565  y[i+1UL] += A(i+1UL,j) * x[j];
566  }
567  if( ipos < iend ) {
568  y[ipos] += A(ipos,j) * x[j];
569  }
570  if( IsUpper<MT1>::value ) {
571  y[iend] = A(iend,j) * x[j];
572  }
573  }
574  }
575 
576  if( IsStrictlyUpper<MT1>::value ) {
577  reset( y[M-1UL] );
578  }
579  }
581  //**********************************************************************************************
582 
583  //**Default assignment to dense vectors (small matrices)****************************************
597  template< typename VT1 // Type of the left-hand side target vector
598  , typename MT1 // Type of the left-hand side matrix operand
599  , typename VT2 > // Type of the right-hand side vector operand
600  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
601  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
602  {
603  selectDefaultAssignKernel( y, A, x );
604  }
606  //**********************************************************************************************
607 
608  //**Vectorized default assignment to dense vectors (small matrices)*****************************
622  template< typename VT1 // Type of the left-hand side target vector
623  , typename MT1 // Type of the left-hand side matrix operand
624  , typename VT2 > // Type of the right-hand side vector operand
625  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
626  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
627  {
628  typedef IntrinsicTrait<ElementType> IT;
629 
630  const size_t M( A.rows() );
631  const size_t N( A.columns() );
632 
633  size_t i( 0UL );
634 
635  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL )
636  {
637  const size_t jbegin( ( IsUpper<MT1>::value )
638  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
639  :( 0UL ) );
640  const size_t jend( ( IsLower<MT1>::value )
641  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
642  :( N ) );
643  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
644 
645  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
646 
647  for( size_t j=jbegin; j<jend; ++j ) {
648  const IntrinsicType x1( set( x[j] ) );
649  xmm1 = xmm1 + A.load(i ,j) * x1;
650  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
651  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
652  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
653  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
654  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
655  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
656  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
657  }
658 
659  y.store( i , xmm1 );
660  y.store( i+IT::size , xmm2 );
661  y.store( i+IT::size*2UL, xmm3 );
662  y.store( i+IT::size*3UL, xmm4 );
663  y.store( i+IT::size*4UL, xmm5 );
664  y.store( i+IT::size*5UL, xmm6 );
665  y.store( i+IT::size*6UL, xmm7 );
666  y.store( i+IT::size*7UL, xmm8 );
667  }
668 
669  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
670  {
671  const size_t jbegin( ( IsUpper<MT1>::value )
672  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
673  :( 0UL ) );
674  const size_t jend( ( IsLower<MT1>::value )
675  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
676  :( N ) );
677  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
678 
679  IntrinsicType xmm1, xmm2, xmm3, xmm4;
680 
681  for( size_t j=jbegin; j<jend; ++j ) {
682  const IntrinsicType x1( set( x[j] ) );
683  xmm1 = xmm1 + A.load(i ,j) * x1;
684  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
685  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
686  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
687  }
688 
689  y.store( i , xmm1 );
690  y.store( i+IT::size , xmm2 );
691  y.store( i+IT::size*2UL, xmm3 );
692  y.store( i+IT::size*3UL, xmm4 );
693  }
694 
695  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL )
696  {
697  const size_t jbegin( ( IsUpper<MT1>::value )
698  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
699  :( 0UL ) );
700  const size_t jend( ( IsLower<MT1>::value )
701  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
702  :( N ) );
703  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
704 
705  IntrinsicType xmm1, xmm2, xmm3;
706 
707  for( size_t j=jbegin; j<jend; ++j ) {
708  const IntrinsicType x1( set( x[j] ) );
709  xmm1 = xmm1 + A.load(i ,j) * x1;
710  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
711  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
712  }
713 
714  y.store( i , xmm1 );
715  y.store( i+IT::size , xmm2 );
716  y.store( i+IT::size*2UL, xmm3 );
717  }
718 
719  for( ; (i+IT::size) < M; i+=IT::size*2UL )
720  {
721  const size_t jbegin( ( IsUpper<MT1>::value )
722  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
723  :( 0UL ) );
724  const size_t jend( ( IsLower<MT1>::value )
725  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
726  :( N ) );
727  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
728 
729  IntrinsicType xmm1, xmm2;
730 
731  for( size_t j=jbegin; j<jend; ++j ) {
732  const IntrinsicType x1( set( x[j] ) );
733  xmm1 = xmm1 + A.load(i ,j) * x1;
734  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
735  }
736 
737  y.store( i , xmm1 );
738  y.store( i+IT::size, xmm2 );
739  }
740 
741  if( i < M )
742  {
743  const size_t jbegin( ( IsUpper<MT1>::value )
744  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
745  :( 0UL ) );
746  const size_t jend( ( IsLower<MT1>::value )
747  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
748  :( N ) );
749  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
750 
751  IntrinsicType xmm1;
752 
753  for( size_t j=jbegin; j<jend; ++j ) {
754  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
755  }
756 
757  y.store( i, xmm1 );
758  }
759  }
761  //**********************************************************************************************
762 
763  //**Default assignment to dense vectors (large matrices)****************************************
777  template< typename VT1 // Type of the left-hand side target vector
778  , typename MT1 // Type of the left-hand side matrix operand
779  , typename VT2 > // Type of the right-hand side vector operand
780  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
781  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
782  {
783  selectDefaultAssignKernel( y, A, x );
784  }
786  //**********************************************************************************************
787 
788  //**Vectorized default assignment to dense vectors (large matrices)*****************************
802  template< typename VT1 // Type of the left-hand side target vector
803  , typename MT1 // Type of the left-hand side matrix operand
804  , typename VT2 > // Type of the right-hand side vector operand
805  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
806  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
807  {
808  typedef IntrinsicTrait<ElementType> IT;
809 
810  const size_t M( A.rows() );
811  const size_t N( A.columns() );
812 
813  const size_t iblock( 32768UL / sizeof( ElementType ) );
814  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
815 
816  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
817 
818  reset( y );
819 
820  for( size_t ii=0U; ii<M; ii+=iblock ) {
821  for( size_t jj=0UL; jj<N; jj+=jblock )
822  {
823  const size_t jend( min( jj+jblock, N ) );
824  const size_t itmp( min( ii+iblock, M ) );
825  const size_t iend( ( IsUpper<MT1>::value )
826  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
827  :( itmp ) );
828 
829  size_t i( ( IsLower<MT1>::value )
830  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
831  :( ii ) );
832 
833  for( ; (i+IT::size*7UL) < iend; i+=IT::size*8UL )
834  {
835  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
836 
837  for( size_t j=jj; j<jend; ++j ) {
838  const IntrinsicType x1( set( x[j] ) );
839  xmm1 = xmm1 + A.load(i ,j) * x1;
840  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
841  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
842  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
843  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
844  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
845  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
846  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
847  }
848 
849  y.store( i , y.load(i ) + xmm1 );
850  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
851  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
852  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
853  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5 );
854  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6 );
855  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7 );
856  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8 );
857  }
858 
859  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
860  {
861  IntrinsicType xmm1, xmm2, xmm3, xmm4;
862 
863  for( size_t j=jj; j<jend; ++j ) {
864  const IntrinsicType x1( set( x[j] ) );
865  xmm1 = xmm1 + A.load(i ,j) * x1;
866  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
867  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
868  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
869  }
870 
871  y.store( i , y.load(i ) + xmm1 );
872  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
873  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
874  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
875  }
876 
877  for( ; (i+IT::size*2UL) < iend; i+=IT::size*3UL )
878  {
879  IntrinsicType xmm1, xmm2, xmm3;
880 
881  for( size_t j=jj; j<jend; ++j ) {
882  const IntrinsicType x1( set( x[j] ) );
883  xmm1 = xmm1 + A.load(i ,j) * x1;
884  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
885  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
886  }
887 
888  y.store( i , y.load(i ) + xmm1 );
889  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
890  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
891  }
892 
893  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
894  {
895  IntrinsicType xmm1, xmm2;
896 
897  for( size_t j=jj; j<jend; ++j ) {
898  const IntrinsicType x1( set( x[j] ) );
899  xmm1 = xmm1 + A.load(i ,j) * x1;
900  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
901  }
902 
903  y.store( i , y.load(i ) + xmm1 );
904  y.store( i+IT::size, y.load(i+IT::size) + xmm2 );
905  }
906 
907  if( i < iend )
908  {
909  IntrinsicType xmm1;
910 
911  for( size_t j=jj; j<jend; ++j ) {
912  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
913  }
914 
915  y.store( i, y.load(i) + xmm1 );
916  }
917  }
918  }
919  }
921  //**********************************************************************************************
922 
923  //**BLAS-based assignment to dense vectors (default)********************************************
937  template< typename VT1 // Type of the left-hand side target vector
938  , typename MT1 // Type of the left-hand side matrix operand
939  , typename VT2 > // Type of the right-hand side vector operand
940  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
941  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
942  {
943  selectLargeAssignKernel( y, A, x );
944  }
946  //**********************************************************************************************
947 
948  //**BLAS-based assignment to dense vectors (single precision)***********************************
949 #if BLAZE_BLAS_MODE
950 
963  template< typename VT1 // Type of the left-hand side target vector
964  , typename MT1 // Type of the left-hand side matrix operand
965  , typename VT2 > // Type of the right-hand side vector operand
966  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
967  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
968  {
969  if( IsTriangular<MT1>::value ) {
970  assign( y, x );
971  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
972  }
973  else {
974  sgemv( y, A, x, 1.0F, 0.0F );
975  }
976  }
978 #endif
979  //**********************************************************************************************
980 
981  //**BLAS-based assignment to dense vectors (double precision)***********************************
982 #if BLAZE_BLAS_MODE
983 
996  template< typename VT1 // Type of the left-hand side target vector
997  , typename MT1 // Type of the left-hand side matrix operand
998  , typename VT2 > // Type of the right-hand side vector operand
999  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1000  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
1001  {
1002  if( IsTriangular<MT1>::value ) {
1003  assign( y, x );
1004  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1005  }
1006  else {
1007  dgemv( y, A, x, 1.0, 0.0 );
1008  }
1009  }
1011 #endif
1012  //**********************************************************************************************
1013 
1014  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1015 #if BLAZE_BLAS_MODE
1016 
1029  template< typename VT1 // Type of the left-hand side target vector
1030  , typename MT1 // Type of the left-hand side matrix operand
1031  , typename VT2 > // Type of the right-hand side vector operand
1032  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1033  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
1034  {
1035  if( IsTriangular<MT1>::value ) {
1036  assign( y, x );
1037  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1038  }
1039  else {
1040  cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1041  }
1042  }
1044 #endif
1045  //**********************************************************************************************
1046 
1047  //**BLAS-based assignment to dense vectors (double precision complex)***************************
1048 #if BLAZE_BLAS_MODE
1049 
1062  template< typename VT1 // Type of the left-hand side target vector
1063  , typename MT1 // Type of the left-hand side matrix operand
1064  , typename VT2 > // Type of the right-hand side vector operand
1065  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1066  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
1067  {
1068  if( IsTriangular<MT1>::value ) {
1069  assign( y, x );
1070  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1071  }
1072  else {
1073  zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1074  }
1075  }
1077 #endif
1078  //**********************************************************************************************
1079 
1080  //**Assignment to sparse vectors****************************************************************
1093  template< typename VT1 > // Type of the target sparse vector
1094  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1095  {
1097 
1101 
1102  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1103 
1104  const ResultType tmp( serial( rhs ) );
1105  assign( ~lhs, tmp );
1106  }
1108  //**********************************************************************************************
1109 
1110  //**Addition assignment to dense vectors********************************************************
1123  template< typename VT1 > // Type of the target dense vector
1124  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1125  {
1127 
1128  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1129 
1130  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1131  return;
1132  }
1133 
1134  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1135  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1136 
1137  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1138  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1139  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1140  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1141 
1142  TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
1143  }
1145  //**********************************************************************************************
1146 
1147  //**Addition assignment to dense vectors (kernel selection)*************************************
1158  template< typename VT1 // Type of the left-hand side target vector
1159  , typename MT1 // Type of the left-hand side matrix operand
1160  , typename VT2 > // Type of the right-hand side vector operand
1161  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1162  {
1163  if( ( IsDiagonal<MT1>::value ) ||
1164  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1165  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1166  selectSmallAddAssignKernel( y, A, x );
1167  else
1168  selectBlasAddAssignKernel( y, A, x );
1169  }
1171  //**********************************************************************************************
1172 
1173  //**Default addition assignment to dense vectors************************************************
1187  template< typename VT1 // Type of the left-hand side target vector
1188  , typename MT1 // Type of the left-hand side matrix operand
1189  , typename VT2 > // Type of the right-hand side vector operand
1190  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1191  {
1192  const size_t M( A.rows() );
1193  const size_t N( A.columns() );
1194 
1195  for( size_t j=0UL; j<N; ++j )
1196  {
1197  if( IsDiagonal<MT1>::value )
1198  {
1199  y[j] += A(j,j) * x[j];
1200  }
1201  else
1202  {
1203  const size_t ibegin( ( IsLower<MT1>::value )
1204  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1205  :( 0UL ) );
1206  const size_t iend( ( IsUpper<MT1>::value )
1207  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1208  :( M ) );
1209  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1210 
1211  const size_t inum( iend - ibegin );
1212  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1213 
1214  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1215  y[i ] += A(i ,j) * x[j];
1216  y[i+1UL] += A(i+1UL,j) * x[j];
1217  }
1218  if( ipos < iend ) {
1219  y[ipos] += A(ipos,j) * x[j];
1220  }
1221  }
1222  }
1223  }
1225  //**********************************************************************************************
1226 
1227  //**Default addition assignment to dense vectors (small matrices)*******************************
1241  template< typename VT1 // Type of the left-hand side target vector
1242  , typename MT1 // Type of the left-hand side matrix operand
1243  , typename VT2 > // Type of the right-hand side vector operand
1244  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1245  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1246  {
1247  selectDefaultAddAssignKernel( y, A, x );
1248  }
1250  //**********************************************************************************************
1251 
1252  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1266  template< typename VT1 // Type of the left-hand side target vector
1267  , typename MT1 // Type of the left-hand side matrix operand
1268  , typename VT2 > // Type of the right-hand side vector operand
1269  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1270  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1271  {
1272  typedef IntrinsicTrait<ElementType> IT;
1273 
1274  const size_t M( A.rows() );
1275  const size_t N( A.columns() );
1276 
1277  size_t i( 0UL );
1278 
1279  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL )
1280  {
1281  const size_t jbegin( ( IsUpper<MT1>::value )
1282  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1283  :( 0UL ) );
1284  const size_t jend( ( IsLower<MT1>::value )
1285  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1286  :( N ) );
1287  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1288 
1289  IntrinsicType xmm1( y.load(i ) );
1290  IntrinsicType xmm2( y.load(i+IT::size ) );
1291  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1292  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1293  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1294  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1295  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1296  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1297 
1298  for( size_t j=jbegin; j<jend; ++j ) {
1299  const IntrinsicType x1( set( x[j] ) );
1300  xmm1 = xmm1 + A.load(i ,j) * x1;
1301  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1302  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1303  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1304  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1305  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1306  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1307  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1308  }
1309 
1310  y.store( i , xmm1 );
1311  y.store( i+IT::size , xmm2 );
1312  y.store( i+IT::size*2UL, xmm3 );
1313  y.store( i+IT::size*3UL, xmm4 );
1314  y.store( i+IT::size*4UL, xmm5 );
1315  y.store( i+IT::size*5UL, xmm6 );
1316  y.store( i+IT::size*6UL, xmm7 );
1317  y.store( i+IT::size*7UL, xmm8 );
1318  }
1319 
1320  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
1321  {
1322  const size_t jbegin( ( IsUpper<MT1>::value )
1323  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1324  :( 0UL ) );
1325  const size_t jend( ( IsLower<MT1>::value )
1326  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1327  :( N ) );
1328  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1329 
1330  IntrinsicType xmm1( y.load(i ) );
1331  IntrinsicType xmm2( y.load(i+IT::size ) );
1332  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1333  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1334 
1335  for( size_t j=jbegin; j<jend; ++j ) {
1336  const IntrinsicType x1( set( x[j] ) );
1337  xmm1 = xmm1 + A.load(i ,j) * x1;
1338  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1339  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1340  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1341  }
1342 
1343  y.store( i , xmm1 );
1344  y.store( i+IT::size , xmm2 );
1345  y.store( i+IT::size*2UL, xmm3 );
1346  y.store( i+IT::size*3UL, xmm4 );
1347  }
1348 
1349  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL )
1350  {
1351  const size_t jbegin( ( IsUpper<MT1>::value )
1352  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1353  :( 0UL ) );
1354  const size_t jend( ( IsLower<MT1>::value )
1355  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1356  :( N ) );
1357  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1358 
1359  IntrinsicType xmm1( y.load(i ) );
1360  IntrinsicType xmm2( y.load(i+IT::size ) );
1361  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1362 
1363  for( size_t j=jbegin; j<jend; ++j ) {
1364  const IntrinsicType x1( set( x[j] ) );
1365  xmm1 = xmm1 + A.load(i ,j) * x1;
1366  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1367  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1368  }
1369 
1370  y.store( i , xmm1 );
1371  y.store( i+IT::size , xmm2 );
1372  y.store( i+IT::size*2UL, xmm3 );
1373  }
1374 
1375  for( ; (i+IT::size) < M; i+=IT::size*2UL )
1376  {
1377  const size_t jbegin( ( IsUpper<MT1>::value )
1378  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1379  :( 0UL ) );
1380  const size_t jend( ( IsLower<MT1>::value )
1381  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1382  :( N ) );
1383  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1384 
1385  IntrinsicType xmm1( y.load(i ) );
1386  IntrinsicType xmm2( y.load(i+IT::size) );
1387 
1388  for( size_t j=jbegin; j<jend; ++j ) {
1389  const IntrinsicType x1( set( x[j] ) );
1390  xmm1 = xmm1 + A.load(i ,j) * x1;
1391  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1392  }
1393 
1394  y.store( i , xmm1 );
1395  y.store( i+IT::size, xmm2 );
1396  }
1397 
1398  if( i < M )
1399  {
1400  const size_t jbegin( ( IsUpper<MT1>::value )
1401  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1402  :( 0UL ) );
1403  const size_t jend( ( IsLower<MT1>::value )
1404  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1405  :( N ) );
1406  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1407 
1408  IntrinsicType xmm1( y.load(i) );
1409 
1410  for( size_t j=jbegin; j<jend; ++j ) {
1411  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
1412  }
1413 
1414  y.store( i, xmm1 );
1415  }
1416  }
1418  //**********************************************************************************************
1419 
1420  //**Default addition assignment to dense vectors (large matrices)*******************************
1434  template< typename VT1 // Type of the left-hand side target vector
1435  , typename MT1 // Type of the left-hand side matrix operand
1436  , typename VT2 > // Type of the right-hand side vector operand
1437  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1438  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1439  {
1440  selectDefaultAddAssignKernel( y, A, x );
1441  }
1443  //**********************************************************************************************
1444 
1445  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1459  template< typename VT1 // Type of the left-hand side target vector
1460  , typename MT1 // Type of the left-hand side matrix operand
1461  , typename VT2 > // Type of the right-hand side vector operand
1462  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1463  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1464  {
1465  typedef IntrinsicTrait<ElementType> IT;
1466 
1467  const size_t M( A.rows() );
1468  const size_t N( A.columns() );
1469 
1470  const size_t iblock( 32768UL / sizeof( ElementType ) );
1471  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1472 
1473  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
1474 
1475  for( size_t ii=0U; ii<M; ii+=iblock ) {
1476  for( size_t jj=0UL; jj<N; jj+=jblock )
1477  {
1478  const size_t jend( min( jj+jblock, N ) );
1479  const size_t itmp( min( ii+iblock, M ) );
1480  const size_t iend( ( IsUpper<MT1>::value )
1481  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1482  :( itmp ) );
1483 
1484  size_t i( ( IsLower<MT1>::value )
1485  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
1486  :( ii ) );
1487 
1488  for( ; (i+IT::size*7UL) < iend; i+=IT::size*8UL )
1489  {
1490  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1491 
1492  for( size_t j=jj; j<jend; ++j ) {
1493  const IntrinsicType x1( set( x[j] ) );
1494  xmm1 = xmm1 + A.load(i ,j) * x1;
1495  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1496  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1497  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1498  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1499  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1500  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1501  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1502  }
1503 
1504  y.store( i , y.load(i ) + xmm1 );
1505  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
1506  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
1507  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
1508  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5 );
1509  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6 );
1510  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7 );
1511  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8 );
1512  }
1513 
1514  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
1515  {
1516  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1517 
1518  for( size_t j=jj; j<jend; ++j ) {
1519  const IntrinsicType x1( set( x[j] ) );
1520  xmm1 = xmm1 + A.load(i ,j) * x1;
1521  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1522  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1523  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1524  }
1525 
1526  y.store( i , y.load(i ) + xmm1 );
1527  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
1528  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
1529  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
1530  }
1531 
1532  for( ; (i+IT::size*2UL) < iend; i+=IT::size*3UL )
1533  {
1534  IntrinsicType xmm1, xmm2, xmm3;
1535 
1536  for( size_t j=jj; j<jend; ++j ) {
1537  const IntrinsicType x1( set( x[j] ) );
1538  xmm1 = xmm1 + A.load(i ,j) * x1;
1539  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1540  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1541  }
1542 
1543  y.store( i , y.load(i ) + xmm1 );
1544  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
1545  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
1546  }
1547 
1548  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
1549  {
1550  IntrinsicType xmm1, xmm2;
1551 
1552  for( size_t j=jj; j<jend; ++j ) {
1553  const IntrinsicType x1( set( x[j] ) );
1554  xmm1 = xmm1 + A.load(i ,j) * x1;
1555  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1556  }
1557 
1558  y.store( i , y.load(i ) + xmm1 );
1559  y.store( i+IT::size, y.load(i+IT::size) + xmm2 );
1560  }
1561 
1562  if( i < iend )
1563  {
1564  IntrinsicType xmm1;
1565 
1566  for( size_t j=jj; j<jend; ++j ) {
1567  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
1568  }
1569 
1570  y.store( i, y.load(i) + xmm1 );
1571  }
1572  }
1573  }
1574  }
1576  //**********************************************************************************************
1577 
1578  //**BLAS-based addition assignment to dense vectors (default)***********************************
1592  template< typename VT1 // Type of the left-hand side target vector
1593  , typename MT1 // Type of the left-hand side matrix operand
1594  , typename VT2 > // Type of the right-hand side vector operand
1595  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1596  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1597  {
1598  selectLargeAddAssignKernel( y, A, x );
1599  }
1601  //**********************************************************************************************
1602 
1603  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1604 #if BLAZE_BLAS_MODE
1605 
1618  template< typename VT1 // Type of the left-hand side target vector
1619  , typename MT1 // Type of the left-hand side matrix operand
1620  , typename VT2 > // Type of the right-hand side vector operand
1621  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1622  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1623  {
1624  if( IsTriangular<MT1>::value ) {
1625  typename VT1::ResultType tmp( x );
1626  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1627  addAssign( y, tmp );
1628  }
1629  else {
1630  sgemv( y, A, x, 1.0F, 1.0F );
1631  }
1632  }
1634 #endif
1635  //**********************************************************************************************
1636 
1637  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1638 #if BLAZE_BLAS_MODE
1639 
1652  template< typename VT1 // Type of the left-hand side target vector
1653  , typename MT1 // Type of the left-hand side matrix operand
1654  , typename VT2 > // Type of the right-hand side vector operand
1655  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1656  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1657  {
1658  if( IsTriangular<MT1>::value ) {
1659  typename VT1::ResultType tmp( x );
1660  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1661  addAssign( y, tmp );
1662  }
1663  else {
1664  dgemv( y, A, x, 1.0, 1.0 );
1665  }
1666  }
1668 #endif
1669  //**********************************************************************************************
1670 
1671  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1672 #if BLAZE_BLAS_MODE
1673 
1686  template< typename VT1 // Type of the left-hand side target vector
1687  , typename MT1 // Type of the left-hand side matrix operand
1688  , typename VT2 > // Type of the right-hand side vector operand
1689  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1690  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1691  {
1692  if( IsTriangular<MT1>::value ) {
1693  typename VT1::ResultType tmp( x );
1694  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1695  addAssign( y, tmp );
1696  }
1697  else {
1698  cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1699  }
1700  }
1702 #endif
1703  //**********************************************************************************************
1704 
1705  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1706 #if BLAZE_BLAS_MODE
1707 
1720  template< typename VT1 // Type of the left-hand side target vector
1721  , typename MT1 // Type of the left-hand side matrix operand
1722  , typename VT2 > // Type of the right-hand side vector operand
1723  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1724  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1725  {
1726  if( IsTriangular<MT1>::value ) {
1727  typename VT1::ResultType tmp( x );
1728  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1729  addAssign( y, tmp );
1730  }
1731  else {
1732  zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1733  }
1734  }
1736 #endif
1737  //**********************************************************************************************
1738 
1739  //**Addition assignment to sparse vectors*******************************************************
1740  // No special implementation for the addition assignment to sparse vectors.
1741  //**********************************************************************************************
1742 
1743  //**Subtraction assignment to dense vectors*****************************************************
1756  template< typename VT1 > // Type of the target dense vector
1757  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1758  {
1760 
1761  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1762 
1763  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1764  return;
1765  }
1766 
1767  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1768  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1769 
1770  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1771  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1772  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1773  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1774 
1775  TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1776  }
1778  //**********************************************************************************************
1779 
1780  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1791  template< typename VT1 // Type of the left-hand side target vector
1792  , typename MT1 // Type of the left-hand side matrix operand
1793  , typename VT2 > // Type of the right-hand side vector operand
1794  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1795  {
1796  if( ( IsDiagonal<MT1>::value ) ||
1797  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1798  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1799  selectSmallSubAssignKernel( y, A, x );
1800  else
1801  selectBlasSubAssignKernel( y, A, x );
1802  }
1804  //**********************************************************************************************
1805 
1806  //**Default subtraction assignment to dense vectors*********************************************
1820  template< typename VT1 // Type of the left-hand side target vector
1821  , typename MT1 // Type of the left-hand side matrix operand
1822  , typename VT2 > // Type of the right-hand side vector operand
1823  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1824  {
1825  const size_t M( A.rows() );
1826  const size_t N( A.columns() );
1827 
1828  for( size_t j=0UL; j<N; ++j )
1829  {
1830  if( IsDiagonal<MT1>::value )
1831  {
1832  y[j] -= A(j,j) * x[j];
1833  }
1834  else
1835  {
1836  const size_t ibegin( ( IsLower<MT1>::value )
1837  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1838  :( 0UL ) );
1839  const size_t iend( ( IsUpper<MT1>::value )
1840  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1841  :( M ) );
1842  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1843 
1844  const size_t inum( iend - ibegin );
1845  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1846 
1847  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1848  y[i ] -= A(i ,j) * x[j];
1849  y[i+1UL] -= A(i+1UL,j) * x[j];
1850  }
1851  if( ipos < iend ) {
1852  y[ipos] -= A(ipos,j) * x[j];
1853  }
1854  }
1855  }
1856  }
1858  //**********************************************************************************************
1859 
1860  //**Default subtraction assignment to dense vectors (small matrices)****************************
1874  template< typename VT1 // Type of the left-hand side target vector
1875  , typename MT1 // Type of the left-hand side matrix operand
1876  , typename VT2 > // Type of the right-hand side vector operand
1877  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1878  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1879  {
1880  selectDefaultSubAssignKernel( y, A, x );
1881  }
1883  //**********************************************************************************************
1884 
1885  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1900  template< typename VT1 // Type of the left-hand side target vector
1901  , typename MT1 // Type of the left-hand side matrix operand
1902  , typename VT2 > // Type of the right-hand side vector operand
1903  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1904  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1905  {
1906  typedef IntrinsicTrait<ElementType> IT;
1907 
1908  const size_t M( A.rows() );
1909  const size_t N( A.columns() );
1910 
1911  size_t i( 0UL );
1912 
1913  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL )
1914  {
1915  const size_t jbegin( ( IsUpper<MT1>::value )
1916  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1917  :( 0UL ) );
1918  const size_t jend( ( IsLower<MT1>::value )
1919  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1920  :( N ) );
1921  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1922 
1923  IntrinsicType xmm1( y.load(i ) );
1924  IntrinsicType xmm2( y.load(i+IT::size ) );
1925  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1926  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1927  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1928  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1929  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1930  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1931 
1932  for( size_t j=jbegin; j<jend; ++j ) {
1933  const IntrinsicType x1( set( x[j] ) );
1934  xmm1 = xmm1 - A.load(i ,j) * x1;
1935  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1936  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1937  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1938  xmm5 = xmm5 - A.load(i+IT::size*4UL,j) * x1;
1939  xmm6 = xmm6 - A.load(i+IT::size*5UL,j) * x1;
1940  xmm7 = xmm7 - A.load(i+IT::size*6UL,j) * x1;
1941  xmm8 = xmm8 - A.load(i+IT::size*7UL,j) * x1;
1942  }
1943 
1944  y.store( i , xmm1 );
1945  y.store( i+IT::size , xmm2 );
1946  y.store( i+IT::size*2UL, xmm3 );
1947  y.store( i+IT::size*3UL, xmm4 );
1948  y.store( i+IT::size*4UL, xmm5 );
1949  y.store( i+IT::size*5UL, xmm6 );
1950  y.store( i+IT::size*6UL, xmm7 );
1951  y.store( i+IT::size*7UL, xmm8 );
1952  }
1953 
1954  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
1955  {
1956  const size_t jbegin( ( IsUpper<MT1>::value )
1957  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1958  :( 0UL ) );
1959  const size_t jend( ( IsLower<MT1>::value )
1960  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1961  :( N ) );
1962  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1963 
1964  IntrinsicType xmm1( y.load(i ) );
1965  IntrinsicType xmm2( y.load(i+IT::size ) );
1966  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1967  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1968 
1969  for( size_t j=jbegin; j<jend; ++j ) {
1970  const IntrinsicType x1( set( x[j] ) );
1971  xmm1 = xmm1 - A.load(i ,j) * x1;
1972  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1973  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1974  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1975  }
1976 
1977  y.store( i , xmm1 );
1978  y.store( i+IT::size , xmm2 );
1979  y.store( i+IT::size*2UL, xmm3 );
1980  y.store( i+IT::size*3UL, xmm4 );
1981  }
1982 
1983  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL )
1984  {
1985  const size_t jbegin( ( IsUpper<MT1>::value )
1986  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1987  :( 0UL ) );
1988  const size_t jend( ( IsLower<MT1>::value )
1989  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1990  :( N ) );
1991  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1992 
1993  IntrinsicType xmm1( y.load(i ) );
1994  IntrinsicType xmm2( y.load(i+IT::size ) );
1995  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1996 
1997  for( size_t j=jbegin; j<jend; ++j ) {
1998  const IntrinsicType x1( set( x[j] ) );
1999  xmm1 = xmm1 - A.load(i ,j) * x1;
2000  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
2001  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
2002  }
2003 
2004  y.store( i , xmm1 );
2005  y.store( i+IT::size , xmm2 );
2006  y.store( i+IT::size*2UL, xmm3 );
2007  }
2008 
2009  for( ; (i+IT::size) < M; i+=IT::size*2UL )
2010  {
2011  const size_t jbegin( ( IsUpper<MT1>::value )
2012  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2013  :( 0UL ) );
2014  const size_t jend( ( IsLower<MT1>::value )
2015  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2016  :( N ) );
2017  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2018 
2019  IntrinsicType xmm1( y.load(i ) );
2020  IntrinsicType xmm2( y.load(i+IT::size) );
2021 
2022  for( size_t j=jbegin; j<jend; ++j ) {
2023  const IntrinsicType x1( set( x[j] ) );
2024  xmm1 = xmm1 - A.load(i ,j) * x1;
2025  xmm2 = xmm2 - A.load(i+IT::size,j) * x1;
2026  }
2027 
2028  y.store( i , xmm1 );
2029  y.store( i+IT::size, xmm2 );
2030  }
2031 
2032  if( i < M )
2033  {
2034  const size_t jbegin( ( IsUpper<MT1>::value )
2035  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2036  :( 0UL ) );
2037  const size_t jend( ( IsLower<MT1>::value )
2038  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2039  :( N ) );
2040  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2041 
2042  IntrinsicType xmm1( y.load(i) );
2043 
2044  for( size_t j=jbegin; j<jend; ++j ) {
2045  xmm1 = xmm1 - A.load(i,j) * set( x[j] );
2046  }
2047 
2048  y.store( i, xmm1 );
2049  }
2050  }
2052  //**********************************************************************************************
2053 
2054  //**Default subtraction assignment to dense vectors (large matrices)****************************
2068  template< typename VT1 // Type of the left-hand side target vector
2069  , typename MT1 // Type of the left-hand side matrix operand
2070  , typename VT2 > // Type of the right-hand side vector operand
2071  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
2072  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2073  {
2074  selectDefaultSubAssignKernel( y, A, x );
2075  }
2077  //**********************************************************************************************
2078 
2079  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
2094  template< typename VT1 // Type of the left-hand side target vector
2095  , typename MT1 // Type of the left-hand side matrix operand
2096  , typename VT2 > // Type of the right-hand side vector operand
2097  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
2098  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2099  {
2100  typedef IntrinsicTrait<ElementType> IT;
2101 
2102  const size_t M( A.rows() );
2103  const size_t N( A.columns() );
2104 
2105  const size_t iblock( 32768UL / sizeof( ElementType ) );
2106  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
2107 
2108  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
2109 
2110  for( size_t ii=0U; ii<M; ii+=iblock ) {
2111  for( size_t jj=0UL; jj<N; jj+=jblock )
2112  {
2113  const size_t jend( min( jj+jblock, N ) );
2114  const size_t itmp( min( ii+iblock, M ) );
2115  const size_t iend( ( IsUpper<MT1>::value )
2116  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
2117  :( itmp ) );
2118 
2119  size_t i( ( IsLower<MT1>::value )
2120  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
2121  :( ii ) );
2122 
2123  for( ; (i+IT::size*7UL) < iend; i+=IT::size*8UL )
2124  {
2125  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2126 
2127  for( size_t j=jj; j<jend; ++j ) {
2128  const IntrinsicType x1( set( x[j] ) );
2129  xmm1 = xmm1 + A.load(i ,j) * x1;
2130  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2131  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2132  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2133  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2134  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2135  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2136  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2137  }
2138 
2139  y.store( i , y.load(i ) - xmm1 );
2140  y.store( i+IT::size , y.load(i+IT::size ) - xmm2 );
2141  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3 );
2142  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4 );
2143  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5 );
2144  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6 );
2145  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7 );
2146  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8 );
2147  }
2148 
2149  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
2150  {
2151  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2152 
2153  for( size_t j=jj; j<jend; ++j ) {
2154  const IntrinsicType x1( set( x[j] ) );
2155  xmm1 = xmm1 + A.load(i ,j) * x1;
2156  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2157  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2158  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2159  }
2160 
2161  y.store( i , y.load(i ) - xmm1 );
2162  y.store( i+IT::size , y.load(i+IT::size ) - xmm2 );
2163  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3 );
2164  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4 );
2165  }
2166 
2167  for( ; (i+IT::size*2UL) < iend; i+=IT::size*3UL )
2168  {
2169  IntrinsicType xmm1, xmm2, xmm3;
2170 
2171  for( size_t j=jj; j<jend; ++j ) {
2172  const IntrinsicType x1( set( x[j] ) );
2173  xmm1 = xmm1 + A.load(i ,j) * x1;
2174  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2175  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2176  }
2177 
2178  y.store( i , y.load(i ) - xmm1 );
2179  y.store( i+IT::size , y.load(i+IT::size ) - xmm2 );
2180  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3 );
2181  }
2182 
2183  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
2184  {
2185  IntrinsicType xmm1, xmm2;
2186 
2187  for( size_t j=jj; j<jend; ++j ) {
2188  const IntrinsicType x1( set( x[j] ) );
2189  xmm1 = xmm1 + A.load(i ,j) * x1;
2190  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2191  }
2192 
2193  y.store( i , y.load(i ) - xmm1 );
2194  y.store( i+IT::size, y.load(i+IT::size) - xmm2 );
2195  }
2196 
2197  if( i < iend )
2198  {
2199  IntrinsicType xmm1;
2200 
2201  for( size_t j=jj; j<jend; ++j ) {
2202  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
2203  }
2204 
2205  y.store( i, y.load(i) - xmm1 );
2206  }
2207  }
2208  }
2209  }
2211  //**********************************************************************************************
2212 
2213  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2227  template< typename VT1 // Type of the left-hand side target vector
2228  , typename MT1 // Type of the left-hand side matrix operand
2229  , typename VT2 > // Type of the right-hand side vector operand
2230  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
2231  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2232  {
2233  selectLargeSubAssignKernel( y, A, x );
2234  }
2236  //**********************************************************************************************
2237 
2238  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2239 #if BLAZE_BLAS_MODE
2240 
2253  template< typename VT1 // Type of the left-hand side target vector
2254  , typename MT1 // Type of the left-hand side matrix operand
2255  , typename VT2 > // Type of the right-hand side vector operand
2256  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
2257  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2258  {
2259  if( IsTriangular<MT1>::value ) {
2260  typename VT1::ResultType tmp( x );
2261  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2262  subAssign( y, tmp );
2263  }
2264  else {
2265  sgemv( y, A, x, -1.0F, 1.0F );
2266  }
2267  }
2269 #endif
2270  //**********************************************************************************************
2271 
2272  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2273 #if BLAZE_BLAS_MODE
2274 
2287  template< typename VT1 // Type of the left-hand side target vector
2288  , typename MT1 // Type of the left-hand side matrix operand
2289  , typename VT2 > // Type of the right-hand side vector operand
2290  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
2291  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2292  {
2293  if( IsTriangular<MT1>::value ) {
2294  typename VT1::ResultType tmp( x );
2295  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2296  subAssign( y, tmp );
2297  }
2298  else {
2299  dgemv( y, A, x, -1.0, 1.0 );
2300  }
2301  }
2303 #endif
2304  //**********************************************************************************************
2305 
2306  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2307 #if BLAZE_BLAS_MODE
2308 
2321  template< typename VT1 // Type of the left-hand side target vector
2322  , typename MT1 // Type of the left-hand side matrix operand
2323  , typename VT2 > // Type of the right-hand side vector operand
2324  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2325  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2326  {
2327  if( IsTriangular<MT1>::value ) {
2328  typename VT1::ResultType tmp( x );
2329  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2330  subAssign( y, tmp );
2331  }
2332  else {
2333  cgemv( y, A, x, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2334  }
2335  }
2337 #endif
2338  //**********************************************************************************************
2339 
2340  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2341 #if BLAZE_BLAS_MODE
2342 
2355  template< typename VT1 // Type of the left-hand side target vector
2356  , typename MT1 // Type of the left-hand side matrix operand
2357  , typename VT2 > // Type of the right-hand side vector operand
2358  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2359  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2360  {
2361  if( IsTriangular<MT1>::value ) {
2362  typename VT1::ResultType tmp( x );
2363  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2364  subAssign( y, tmp );
2365  }
2366  else {
2367  zgemv( y, A, x, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2368  }
2369  }
2371 #endif
2372  //**********************************************************************************************
2373 
2374  //**Subtraction assignment to sparse vectors****************************************************
2375  // No special implementation for the subtraction assignment to sparse vectors.
2376  //**********************************************************************************************
2377 
2378  //**Multiplication assignment to dense vectors**************************************************
2391  template< typename VT1 > // Type of the target dense vector
2392  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2393  {
2395 
2399 
2400  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2401 
2402  const ResultType tmp( serial( rhs ) );
2403  multAssign( ~lhs, tmp );
2404  }
2406  //**********************************************************************************************
2407 
2408  //**Multiplication assignment to sparse vectors*************************************************
2409  // No special implementation for the multiplication assignment to sparse vectors.
2410  //**********************************************************************************************
2411 
2412  //**SMP assignment to dense vectors*************************************************************
2427  template< typename VT1 > // Type of the target dense vector
2428  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2429  smpAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2430  {
2432 
2433  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2434 
2435  if( rhs.mat_.rows() == 0UL ) {
2436  return;
2437  }
2438  else if( rhs.mat_.columns() == 0UL ) {
2439  reset( ~lhs );
2440  return;
2441  }
2442 
2443  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2444  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2445 
2446  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2447  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2448  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2449  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2450 
2451  smpAssign( ~lhs, A * x );
2452  }
2454  //**********************************************************************************************
2455 
2456  //**SMP assignment to sparse vectors************************************************************
2471  template< typename VT1 > // Type of the target sparse vector
2472  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2473  smpAssign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2474  {
2476 
2480 
2481  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2482 
2483  const ResultType tmp( rhs );
2484  smpAssign( ~lhs, tmp );
2485  }
2487  //**********************************************************************************************
2488 
2489  //**SMP addition assignment to dense vectors****************************************************
2504  template< typename VT1 > // Type of the target dense vector
2505  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2506  smpAddAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2507  {
2509 
2510  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2511 
2512  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2513  return;
2514  }
2515 
2516  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2517  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2518 
2519  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2520  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2521  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2522  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2523 
2524  smpAddAssign( ~lhs, A * x );
2525  }
2527  //**********************************************************************************************
2528 
2529  //**SMP addition assignment to sparse vectors***************************************************
2530  // No special implementation for the SMP addition assignment to sparse vectors.
2531  //**********************************************************************************************
2532 
2533  //**SMP subtraction assignment to dense vectors*************************************************
2548  template< typename VT1 > // Type of the target dense vector
2549  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2550  smpSubAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2551  {
2553 
2554  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2555 
2556  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2557  return;
2558  }
2559 
2560  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2561  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2562 
2563  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2564  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2565  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2566  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2567 
2568  smpSubAssign( ~lhs, A * x );
2569  }
2571  //**********************************************************************************************
2572 
2573  //**SMP subtraction assignment to sparse vectors************************************************
2574  // No special implementation for the SMP subtraction assignment to sparse vectors.
2575  //**********************************************************************************************
2576 
2577  //**SMP multiplication assignment to dense vectors**********************************************
2592  template< typename VT1 > // Type of the target dense vector
2593  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2594  smpMultAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2595  {
2597 
2601 
2602  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2603 
2604  const ResultType tmp( rhs );
2605  smpMultAssign( ~lhs, tmp );
2606  }
2608  //**********************************************************************************************
2609 
2610  //**SMP multiplication assignment to sparse vectors*********************************************
2611  // No special implementation for the SMP multiplication assignment to sparse vectors.
2612  //**********************************************************************************************
2613 
2614  //**Compile time checks*************************************************************************
2622  //**********************************************************************************************
2623 };
2624 //*************************************************************************************************
2625 
2626 
2627 
2628 
2629 //=================================================================================================
2630 //
2631 // DVECSCALARMULTEXPR SPECIALIZATION
2632 //
2633 //=================================================================================================
2634 
2635 //*************************************************************************************************
2644 template< typename MT // Type of the left-hand side dense matrix
2645  , typename VT // Type of the right-hand side dense vector
2646  , typename ST > // Type of the side scalar value
2647 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
2648  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
2649  , private VecScalarMultExpr
2650  , private Computation
2651 {
2652  private:
2653  //**Type definitions****************************************************************************
2654  typedef TDMatDVecMultExpr<MT,VT> MVM;
2655  typedef typename MVM::ResultType RES;
2656  typedef typename MT::ResultType MRT;
2657  typedef typename VT::ResultType VRT;
2658  typedef typename MRT::ElementType MET;
2659  typedef typename VRT::ElementType VET;
2660  typedef typename MT::CompositeType MCT;
2661  typedef typename VT::CompositeType VCT;
2662  //**********************************************************************************************
2663 
2664  //**********************************************************************************************
2666  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2667  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2668  //**********************************************************************************************
2669 
2670  //**********************************************************************************************
2672  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2673  //**********************************************************************************************
2674 
2675  //**********************************************************************************************
2677 
2680  template< typename T1 >
2681  struct UseSMPAssign {
2682  enum { value = ( evaluateMatrix || evaluateVector ) };
2683  };
2684  //**********************************************************************************************
2685 
2686  //**********************************************************************************************
2688 
2691  template< typename T1, typename T2, typename T3, typename T4 >
2692  struct UseSinglePrecisionKernel {
2693  enum { value = BLAZE_BLAS_MODE &&
2694  HasMutableDataAccess<T1>::value &&
2695  HasConstDataAccess<T2>::value &&
2696  HasConstDataAccess<T3>::value &&
2697  !IsDiagonal<T2>::value &&
2698  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2699  IsFloat<typename T1::ElementType>::value &&
2700  IsFloat<typename T2::ElementType>::value &&
2701  IsFloat<typename T3::ElementType>::value &&
2702  !IsComplex<T4>::value };
2703  };
2704  //**********************************************************************************************
2705 
2706  //**********************************************************************************************
2708 
2711  template< typename T1, typename T2, typename T3, typename T4 >
2712  struct UseDoublePrecisionKernel {
2713  enum { value = BLAZE_BLAS_MODE &&
2714  HasMutableDataAccess<T1>::value &&
2715  HasConstDataAccess<T2>::value &&
2716  HasConstDataAccess<T3>::value &&
2717  !IsDiagonal<T2>::value &&
2718  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2719  IsDouble<typename T1::ElementType>::value &&
2720  IsDouble<typename T2::ElementType>::value &&
2721  IsDouble<typename T3::ElementType>::value &&
2722  !IsComplex<T4>::value };
2723  };
2724  //**********************************************************************************************
2725 
2726  //**********************************************************************************************
2728 
2731  template< typename T1, typename T2, typename T3 >
2732  struct UseSinglePrecisionComplexKernel {
2733  typedef complex<float> Type;
2734  enum { value = BLAZE_BLAS_MODE &&
2735  HasMutableDataAccess<T1>::value &&
2736  HasConstDataAccess<T2>::value &&
2737  HasConstDataAccess<T3>::value &&
2738  !IsDiagonal<T2>::value &&
2739  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2740  IsSame<typename T1::ElementType,Type>::value &&
2741  IsSame<typename T2::ElementType,Type>::value &&
2742  IsSame<typename T3::ElementType,Type>::value };
2743  };
2744  //**********************************************************************************************
2745 
2746  //**********************************************************************************************
2748 
2751  template< typename T1, typename T2, typename T3 >
2752  struct UseDoublePrecisionComplexKernel {
2753  typedef complex<double> Type;
2754  enum { value = BLAZE_BLAS_MODE &&
2755  HasMutableDataAccess<T1>::value &&
2756  HasConstDataAccess<T2>::value &&
2757  HasConstDataAccess<T3>::value &&
2758  !IsDiagonal<T2>::value &&
2759  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2760  IsSame<typename T1::ElementType,Type>::value &&
2761  IsSame<typename T2::ElementType,Type>::value &&
2762  IsSame<typename T3::ElementType,Type>::value };
2763  };
2764  //**********************************************************************************************
2765 
2766  //**********************************************************************************************
2768 
2770  template< typename T1, typename T2, typename T3, typename T4 >
2771  struct UseDefaultKernel {
2772  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2773  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2774  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2775  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2776  };
2777  //**********************************************************************************************
2778 
2779  //**********************************************************************************************
2781 
2784  template< typename T1, typename T2, typename T3, typename T4 >
2785  struct UseVectorizedDefaultKernel {
2786  enum { value = !IsDiagonal<T2>::value &&
2787  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2788  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2789  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2790  IsSame<typename T1::ElementType,T4>::value &&
2791  IntrinsicTrait<typename T1::ElementType>::addition &&
2792  IntrinsicTrait<typename T1::ElementType>::multiplication };
2793  };
2794  //**********************************************************************************************
2795 
2796  public:
2797  //**Type definitions****************************************************************************
2798  typedef DVecScalarMultExpr<MVM,ST,false> This;
2799  typedef typename MultTrait<RES,ST>::Type ResultType;
2800  typedef typename ResultType::TransposeType TransposeType;
2801  typedef typename ResultType::ElementType ElementType;
2802  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2803  typedef const ElementType ReturnType;
2804  typedef const ResultType CompositeType;
2805 
2807  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
2808 
2810  typedef ST RightOperand;
2811 
2813  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
2814 
2816  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
2817  //**********************************************************************************************
2818 
2819  //**Compilation flags***************************************************************************
2821  enum { vectorizable = !IsDiagonal<MT>::value &&
2822  MT::vectorizable && VT::vectorizable &&
2823  IsSame<MET,VET>::value &&
2824  IsSame<MET,ST>::value &&
2825  IntrinsicTrait<MET>::addition &&
2826  IntrinsicTrait<MET>::multiplication };
2827 
2829  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2830  !evaluateVector && VT::smpAssignable };
2831  //**********************************************************************************************
2832 
2833  //**Constructor*********************************************************************************
2839  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
2840  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2841  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2842  {}
2843  //**********************************************************************************************
2844 
2845  //**Subscript operator**************************************************************************
2851  inline ReturnType operator[]( size_t index ) const {
2852  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2853  return vector_[index] * scalar_;
2854  }
2855  //**********************************************************************************************
2856 
2857  //**Size function*******************************************************************************
2862  inline size_t size() const {
2863  return vector_.size();
2864  }
2865  //**********************************************************************************************
2866 
2867  //**Left operand access*************************************************************************
2872  inline LeftOperand leftOperand() const {
2873  return vector_;
2874  }
2875  //**********************************************************************************************
2876 
2877  //**Right operand access************************************************************************
2882  inline RightOperand rightOperand() const {
2883  return scalar_;
2884  }
2885  //**********************************************************************************************
2886 
2887  //**********************************************************************************************
2893  template< typename T >
2894  inline bool canAlias( const T* alias ) const {
2895  return vector_.canAlias( alias );
2896  }
2897  //**********************************************************************************************
2898 
2899  //**********************************************************************************************
2905  template< typename T >
2906  inline bool isAliased( const T* alias ) const {
2907  return vector_.isAliased( alias );
2908  }
2909  //**********************************************************************************************
2910 
2911  //**********************************************************************************************
2916  inline bool isAligned() const {
2917  return vector_.isAligned();
2918  }
2919  //**********************************************************************************************
2920 
2921  //**********************************************************************************************
2926  inline bool canSMPAssign() const {
2927  typename MVM::LeftOperand A( vector_.leftOperand() );
2928  return ( !BLAZE_BLAS_IS_PARALLEL ||
2929  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2930  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2932  }
2933  //**********************************************************************************************
2934 
2935  private:
2936  //**Member variables****************************************************************************
2937  LeftOperand vector_;
2938  RightOperand scalar_;
2939  //**********************************************************************************************
2940 
2941  //**Assignment to dense vectors*****************************************************************
2953  template< typename VT1 > // Type of the target dense vector
2954  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2955  {
2957 
2958  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2959 
2960  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2961  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2962 
2963  if( left.rows() == 0UL ) {
2964  return;
2965  }
2966  else if( left.columns() == 0UL ) {
2967  reset( ~lhs );
2968  return;
2969  }
2970 
2971  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2972  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2973 
2974  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2975  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2976  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2977  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2978 
2979  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2980  }
2981  //**********************************************************************************************
2982 
2983  //**Assignment to dense vectors (kernel selection)**********************************************
2994  template< typename VT1 // Type of the left-hand side target vector
2995  , typename MT1 // Type of the left-hand side matrix operand
2996  , typename VT2 // Type of the right-hand side vector operand
2997  , typename ST2 > // Type of the scalar value
2998  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2999  {
3000  if( ( IsDiagonal<MT1>::value ) ||
3001  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3002  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3003  selectSmallAssignKernel( y, A, x, scalar );
3004  else
3005  selectBlasAssignKernel( y, A, x, scalar );
3006  }
3007  //**********************************************************************************************
3008 
3009  //**Default assignment to dense vectors*********************************************************
3023  template< typename VT1 // Type of the left-hand side target vector
3024  , typename MT1 // Type of the left-hand side matrix operand
3025  , typename VT2 // Type of the right-hand side vector operand
3026  , typename ST2 > // Type of the scalar value
3027  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3028  {
3029  const size_t M( A.rows() );
3030  const size_t N( A.columns() );
3031 
3032  if( IsStrictlyLower<MT1>::value ) {
3033  reset( y[0] );
3034  }
3035 
3036  if( !IsUpper<MT1>::value )
3037  {
3038  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
3039  y[i] = A(i,0UL) * x[0UL];
3040  }
3041  }
3042 
3043  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
3044  {
3045  if( IsDiagonal<MT1>::value )
3046  {
3047  y[j] = A(j,j) * x[j] * scalar;
3048  }
3049  else
3050  {
3051  const size_t ibegin( ( IsLower<MT1>::value )
3052  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3053  :( 0UL ) );
3054  const size_t iend( ( IsUpper<MT1>::value )
3055  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
3056  :( M ) );
3057  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3058 
3059  const size_t inum( iend - ibegin );
3060  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
3061 
3062  for( size_t i=ibegin; i<ipos; i+=2UL ) {
3063  y[i ] += A(i ,j) * x[j];
3064  y[i+1UL] += A(i+1UL,j) * x[j];
3065  }
3066  if( ipos < iend ) {
3067  y[ipos] += A(ipos,j) * x[j];
3068  }
3069  if( IsUpper<MT1>::value ) {
3070  y[iend] = A(iend,j) * x[j];
3071  }
3072  }
3073  }
3074 
3075  if( IsStrictlyUpper<MT1>::value ) {
3076  reset( y[M-1UL] );
3077  }
3078 
3079  if( !IsDiagonal<MT1>::value )
3080  {
3081  const size_t iend( IsStrictlyUpper<MT1>::value ? M-1UL : M );
3082  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<iend; ++i ) {
3083  y[i] *= scalar;
3084  }
3085  }
3086  }
3087  //**********************************************************************************************
3088 
3089  //**Default assignment to dense vectors (small matrices)****************************************
3103  template< typename VT1 // Type of the left-hand side target vector
3104  , typename MT1 // Type of the left-hand side matrix operand
3105  , typename VT2 // Type of the right-hand side vector operand
3106  , typename ST2 > // Type of the scalar value
3107  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3108  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3109  {
3110  selectDefaultAssignKernel( y, A, x, scalar );
3111  }
3112  //**********************************************************************************************
3113 
3114  //**Vectorized default assignment to dense vectors (small matrices)*****************************
3128  template< typename VT1 // Type of the left-hand side target vector
3129  , typename MT1 // Type of the left-hand side matrix operand
3130  , typename VT2 // Type of the right-hand side vector operand
3131  , typename ST2 > // Type of the scalar value
3132  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3133  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3134  {
3135  typedef IntrinsicTrait<ElementType> IT;
3136 
3137  const size_t M( A.rows() );
3138  const size_t N( A.columns() );
3139 
3140  const IntrinsicType factor( set( scalar ) );
3141 
3142  size_t i( 0UL );
3143 
3144  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL )
3145  {
3146  const size_t jbegin( ( IsUpper<MT1>::value )
3147  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3148  :( 0UL ) );
3149  const size_t jend( ( IsLower<MT1>::value )
3150  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3151  :( N ) );
3152  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3153 
3154  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3155 
3156  for( size_t j=jbegin; j<jend; ++j ) {
3157  const IntrinsicType x1( set( x[j] ) );
3158  xmm1 = xmm1 + A.load(i ,j) * x1;
3159  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3160  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3161  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3162  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3163  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3164  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3165  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3166  }
3167 
3168  y.store( i , xmm1*factor );
3169  y.store( i+IT::size , xmm2*factor );
3170  y.store( i+IT::size*2UL, xmm3*factor );
3171  y.store( i+IT::size*3UL, xmm4*factor );
3172  y.store( i+IT::size*4UL, xmm5*factor );
3173  y.store( i+IT::size*5UL, xmm6*factor );
3174  y.store( i+IT::size*6UL, xmm7*factor );
3175  y.store( i+IT::size*7UL, xmm8*factor );
3176  }
3177 
3178  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
3179  {
3180  const size_t jbegin( ( IsUpper<MT1>::value )
3181  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3182  :( 0UL ) );
3183  const size_t jend( ( IsLower<MT1>::value )
3184  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3185  :( N ) );
3186  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3187 
3188  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3189 
3190  for( size_t j=jbegin; j<jend; ++j ) {
3191  const IntrinsicType x1( set( x[j] ) );
3192  xmm1 = xmm1 + A.load(i ,j) * x1;
3193  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3194  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3195  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3196  }
3197 
3198  y.store( i , xmm1*factor );
3199  y.store( i+IT::size , xmm2*factor );
3200  y.store( i+IT::size*2UL, xmm3*factor );
3201  y.store( i+IT::size*3UL, xmm4*factor );
3202  }
3203 
3204  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL )
3205  {
3206  const size_t jbegin( ( IsUpper<MT1>::value )
3207  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3208  :( 0UL ) );
3209  const size_t jend( ( IsLower<MT1>::value )
3210  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3211  :( N ) );
3212  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3213 
3214  IntrinsicType xmm1, xmm2, xmm3;
3215 
3216  for( size_t j=jbegin; j<jend; ++j ) {
3217  const IntrinsicType x1( set( x[j] ) );
3218  xmm1 = xmm1 + A.load(i ,j) * x1;
3219  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3220  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3221  }
3222 
3223  y.store( i , xmm1*factor );
3224  y.store( i+IT::size , xmm2*factor );
3225  y.store( i+IT::size*2UL, xmm3*factor );
3226  }
3227 
3228  for( ; (i+IT::size) < M; i+=IT::size*2UL )
3229  {
3230  const size_t jbegin( ( IsUpper<MT1>::value )
3231  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3232  :( 0UL ) );
3233  const size_t jend( ( IsLower<MT1>::value )
3234  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3235  :( N ) );
3236  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3237 
3238  IntrinsicType xmm1, xmm2;
3239 
3240  for( size_t j=jbegin; j<jend; ++j ) {
3241  const IntrinsicType x1( set( x[j] ) );
3242  xmm1 = xmm1 + A.load(i ,j) * x1;
3243  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3244  }
3245 
3246  y.store( i , xmm1*factor );
3247  y.store( i+IT::size, xmm2*factor );
3248  }
3249 
3250  if( i < M )
3251  {
3252  const size_t jbegin( ( IsUpper<MT1>::value )
3253  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3254  :( 0UL ) );
3255  const size_t jend( ( IsLower<MT1>::value )
3256  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3257  :( N ) );
3258  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3259 
3260  IntrinsicType xmm1;
3261 
3262  for( size_t j=jbegin; j<jend; ++j ) {
3263  const IntrinsicType x1( set( x[j] ) );
3264  xmm1 = xmm1 + A.load(i,j) * x1;
3265  }
3266 
3267  y.store( i, xmm1*factor );
3268  }
3269  }
3270  //**********************************************************************************************
3271 
3272  //**Default assignment to dense vectors (large matrices)****************************************
3286  template< typename VT1 // Type of the left-hand side target vector
3287  , typename MT1 // Type of the left-hand side matrix operand
3288  , typename VT2 // Type of the right-hand side vector operand
3289  , typename ST2 > // Type of the scalar value
3290  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3291  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3292  {
3293  selectDefaultAssignKernel( y, A, x, scalar );
3294  }
3295  //**********************************************************************************************
3296 
3297  //**Vectorized default assignment to dense vectors (large matrices)*****************************
3311  template< typename VT1 // Type of the left-hand side target vector
3312  , typename MT1 // Type of the left-hand side matrix operand
3313  , typename VT2 // Type of the right-hand side vector operand
3314  , typename ST2 > // Type of the scalar value
3315  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3316  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3317  {
3318  typedef IntrinsicTrait<ElementType> IT;
3319 
3320  const size_t M( A.rows() );
3321  const size_t N( A.columns() );
3322 
3323  const size_t iblock( 32768UL / sizeof( ElementType ) );
3324  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3325 
3326  const IntrinsicType factor( set( scalar ) );
3327 
3328  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
3329 
3330  reset( y );
3331 
3332  for( size_t ii=0U; ii<M; ii+=iblock ) {
3333  for( size_t jj=0UL; jj<N; jj+=jblock )
3334  {
3335  const size_t jend( min( jj+jblock, N ) );
3336  const size_t itmp( min( ii+iblock, M ) );
3337  const size_t iend( ( IsUpper<MT1>::value )
3338  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3339  :( itmp ) );
3340 
3341  size_t i( ( IsLower<MT1>::value )
3342  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
3343  :( ii ) );
3344 
3345  for( ; (i+IT::size*7UL) < iend; i+=IT::size*8UL )
3346  {
3347  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3348 
3349  for( size_t j=jj; j<jend; ++j ) {
3350  const IntrinsicType x1( set( x[j] ) );
3351  xmm1 = xmm1 + A.load(i ,j) * x1;
3352  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3353  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3354  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3355  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3356  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3357  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3358  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3359  }
3360 
3361  y.store( i , y.load(i ) + xmm1*factor );
3362  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3363  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3364  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3365  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
3366  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
3367  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
3368  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
3369  }
3370 
3371  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
3372  {
3373  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3374 
3375  for( size_t j=jj; j<jend; ++j ) {
3376  const IntrinsicType x1( set( x[j] ) );
3377  xmm1 = xmm1 + A.load(i ,j) * x1;
3378  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3379  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3380  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3381  }
3382 
3383  y.store( i , y.load(i ) + xmm1*factor );
3384  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3385  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3386  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3387  }
3388 
3389  for( ; (i+IT::size*2UL) < iend; i+=IT::size*3UL )
3390  {
3391  IntrinsicType xmm1, xmm2, xmm3;
3392 
3393  for( size_t j=jj; j<jend; ++j ) {
3394  const IntrinsicType x1( set( x[j] ) );
3395  xmm1 = xmm1 + A.load(i ,j) * x1;
3396  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3397  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3398  }
3399 
3400  y.store( i , y.load(i ) + xmm1*factor );
3401  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3402  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3403  }
3404 
3405  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
3406  {
3407  IntrinsicType xmm1, xmm2;
3408 
3409  for( size_t j=jj; j<jend; ++j ) {
3410  const IntrinsicType x1( set( x[j] ) );
3411  xmm1 = xmm1 + A.load(i ,j) * x1;
3412  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3413  }
3414 
3415  y.store( i , y.load(i ) + xmm1*factor );
3416  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
3417  }
3418 
3419  if( i < iend )
3420  {
3421  IntrinsicType xmm1;
3422 
3423  for( size_t j=jj; j<jend; ++j ) {
3424  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3425  }
3426 
3427  y.store( i, y.load(i) + xmm1*factor );
3428  }
3429  }
3430  }
3431  }
3432  //**********************************************************************************************
3433 
3434  //**BLAS-based assignment to dense vectors (default)********************************************
3448  template< typename VT1 // Type of the left-hand side target vector
3449  , typename MT1 // Type of the left-hand side matrix operand
3450  , typename VT2 // Type of the right-hand side vector operand
3451  , typename ST2 > // Type of the scalar value
3452  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3453  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3454  {
3455  selectLargeAssignKernel( y, A, x, scalar );
3456  }
3457  //**********************************************************************************************
3458 
3459  //**BLAS-based assignment to dense vectors (single precision)***********************************
3460 #if BLAZE_BLAS_MODE
3461 
3474  template< typename VT1 // Type of the left-hand side target vector
3475  , typename MT1 // Type of the left-hand side matrix operand
3476  , typename VT2 // Type of the right-hand side vector operand
3477  , typename ST2 > // Type of the scalar value
3478  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3479  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3480  {
3481  if( IsTriangular<MT1>::value ) {
3482  assign( y, scalar * x );
3483  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3484  }
3485  else {
3486  sgemv( y, A, x, scalar, 0.0F );
3487  }
3488  }
3489 #endif
3490  //**********************************************************************************************
3491 
3492  //**BLAS-based assignment to dense vectors (double precision)***********************************
3493 #if BLAZE_BLAS_MODE
3494 
3507  template< typename VT1 // Type of the left-hand side target vector
3508  , typename MT1 // Type of the left-hand side matrix operand
3509  , typename VT2 // Type of the right-hand side vector operand
3510  , typename ST2 > // Type of the scalar value
3511  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3512  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3513  {
3514  if( IsTriangular<MT1>::value ) {
3515  assign( y, scalar * x );
3516  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3517  }
3518  else {
3519  dgemv( y, A, x, scalar, 0.0 );
3520  }
3521  }
3522 #endif
3523  //**********************************************************************************************
3524 
3525  //**BLAS-based assignment to dense vectors (single precision complex)***************************
3526 #if BLAZE_BLAS_MODE
3527 
3540  template< typename VT1 // Type of the left-hand side target vector
3541  , typename MT1 // Type of the left-hand side matrix operand
3542  , typename VT2 // Type of the right-hand side vector operand
3543  , typename ST2 > // Type of the scalar value
3544  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3545  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3546  {
3547  if( IsTriangular<MT1>::value ) {
3548  assign( y, scalar * x );
3549  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3550  }
3551  else {
3552  cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3553  }
3554  }
3555 #endif
3556  //**********************************************************************************************
3557 
3558  //**BLAS-based assignment to dense vectors (double precision complex)***************************
3559 #if BLAZE_BLAS_MODE
3560 
3573  template< typename VT1 // Type of the left-hand side target vector
3574  , typename MT1 // Type of the left-hand side matrix operand
3575  , typename VT2 // Type of the right-hand side vector operand
3576  , typename ST2 > // Type of the scalar value
3577  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3578  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3579  {
3580  if( IsTriangular<MT1>::value ) {
3581  assign( y, scalar * x );
3582  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3583  }
3584  else {
3585  zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3586  }
3587  }
3588 #endif
3589  //**********************************************************************************************
3590 
3591  //**Assignment to sparse vectors****************************************************************
3603  template< typename VT1 > // Type of the target sparse vector
3604  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3605  {
3607 
3611 
3612  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3613 
3614  const ResultType tmp( serial( rhs ) );
3615  assign( ~lhs, tmp );
3616  }
3617  //**********************************************************************************************
3618 
3619  //**Addition assignment to dense vectors********************************************************
3631  template< typename VT1 > // Type of the target dense vector
3632  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3633  {
3635 
3636  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3637 
3638  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3639  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3640 
3641  if( left.rows() == 0UL || left.columns() == 0UL ) {
3642  return;
3643  }
3644 
3645  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3646  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3647 
3648  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3649  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3650  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3651  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3652 
3653  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
3654  }
3655  //**********************************************************************************************
3656 
3657  //**Addition assignment to dense vectors (kernel selection)*************************************
3668  template< typename VT1 // Type of the left-hand side target vector
3669  , typename MT1 // Type of the left-hand side matrix operand
3670  , typename VT2 // Type of the right-hand side vector operand
3671  , typename ST2 > // Type of the scalar value
3672  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3673  {
3674  if( ( IsDiagonal<MT1>::value ) ||
3675  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3676  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3677  selectSmallAddAssignKernel( y, A, x, scalar );
3678  else
3679  selectBlasAddAssignKernel( y, A, x, scalar );
3680  }
3681  //**********************************************************************************************
3682 
3683  //**Default addition assignment to dense vectors************************************************
3697  template< typename VT1 // Type of the left-hand side target vector
3698  , typename MT1 // Type of the left-hand side matrix operand
3699  , typename VT2 // Type of the right-hand side vector operand
3700  , typename ST2 > // Type of the scalar value
3701  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3702  {
3703  y.addAssign( A * x * scalar );
3704  }
3705  //**********************************************************************************************
3706 
3707  //**Default addition assignment to dense vectors (small matrices)*******************************
3721  template< typename VT1 // Type of the left-hand side target vector
3722  , typename MT1 // Type of the left-hand side matrix operand
3723  , typename VT2 // Type of the right-hand side vector operand
3724  , typename ST2 > // Type of the scalar value
3725  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3726  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3727  {
3728  selectDefaultAddAssignKernel( y, A, x, scalar );
3729  }
3730  //**********************************************************************************************
3731 
3732  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3747  template< typename VT1 // Type of the left-hand side target vector
3748  , typename MT1 // Type of the left-hand side matrix operand
3749  , typename VT2 // Type of the right-hand side vector operand
3750  , typename ST2 > // Type of the scalar value
3751  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3752  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3753  {
3754  typedef IntrinsicTrait<ElementType> IT;
3755 
3756  const size_t M( A.rows() );
3757  const size_t N( A.columns() );
3758 
3759  const IntrinsicType factor( set( scalar ) );
3760 
3761  size_t i( 0UL );
3762 
3763  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL )
3764  {
3765  const size_t jbegin( ( IsUpper<MT1>::value )
3766  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3767  :( 0UL ) );
3768  const size_t jend( ( IsLower<MT1>::value )
3769  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3770  :( N ) );
3771  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3772 
3773  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3774 
3775  for( size_t j=jbegin; j<jend; ++j ) {
3776  const IntrinsicType x1( set( x[j] ) );
3777  xmm1 = xmm1 + A.load(i ,j) * x1;
3778  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3779  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3780  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3781  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3782  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3783  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3784  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3785  }
3786 
3787  y.store( i , y.load(i ) + xmm1*factor );
3788  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3789  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3790  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3791  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
3792  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
3793  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
3794  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
3795  }
3796 
3797  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
3798  {
3799  const size_t jbegin( ( IsUpper<MT1>::value )
3800  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3801  :( 0UL ) );
3802  const size_t jend( ( IsLower<MT1>::value )
3803  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3804  :( N ) );
3805  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3806 
3807  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3808 
3809  for( size_t j=jbegin; j<jend; ++j ) {
3810  const IntrinsicType x1( set( x[j] ) );
3811  xmm1 = xmm1 + A.load(i ,j) * x1;
3812  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3813  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3814  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3815  }
3816 
3817  y.store( i , y.load(i ) + xmm1*factor );
3818  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3819  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3820  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3821  }
3822 
3823  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL )
3824  {
3825  const size_t jbegin( ( IsUpper<MT1>::value )
3826  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3827  :( 0UL ) );
3828  const size_t jend( ( IsLower<MT1>::value )
3829  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3830  :( N ) );
3831  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3832 
3833  IntrinsicType xmm1, xmm2, xmm3;
3834 
3835  for( size_t j=jbegin; j<jend; ++j ) {
3836  const IntrinsicType x1( set( x[j] ) );
3837  xmm1 = xmm1 + A.load(i ,j) * x1;
3838  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3839  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3840  }
3841 
3842  y.store( i , y.load(i ) + xmm1*factor );
3843  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3844  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3845  }
3846 
3847  for( ; (i+IT::size) < M; i+=IT::size*2UL )
3848  {
3849  const size_t jbegin( ( IsUpper<MT1>::value )
3850  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3851  :( 0UL ) );
3852  const size_t jend( ( IsLower<MT1>::value )
3853  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3854  :( N ) );
3855  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3856 
3857  IntrinsicType xmm1, xmm2;
3858 
3859  for( size_t j=jbegin; j<jend; ++j ) {
3860  const IntrinsicType x1( set( x[j] ) );
3861  xmm1 = xmm1 + A.load(i ,j) * x1;
3862  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3863  }
3864 
3865  y.store( i , y.load(i ) + xmm1*factor );
3866  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
3867  }
3868 
3869  if( i < M )
3870  {
3871  const size_t jbegin( ( IsUpper<MT1>::value )
3872  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3873  :( 0UL ) );
3874  const size_t jend( ( IsLower<MT1>::value )
3875  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3876  :( N ) );
3877  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3878 
3879  IntrinsicType xmm1;
3880 
3881  for( size_t j=jbegin; j<jend; ++j ) {
3882  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3883  }
3884 
3885  y.store( i, y.load(i) + xmm1*factor );
3886  }
3887  }
3888  //**********************************************************************************************
3889 
3890  //**Default addition assignment to dense vectors (large matrices)*******************************
3904  template< typename VT1 // Type of the left-hand side target vector
3905  , typename MT1 // Type of the left-hand side matrix operand
3906  , typename VT2 // Type of the right-hand side vector operand
3907  , typename ST2 > // Type of the scalar value
3908  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3909  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3910  {
3911  selectDefaultAddAssignKernel( y, A, x, scalar );
3912  }
3913  //**********************************************************************************************
3914 
3915  //**Vectorized default addition assignment to dense vectors (large matrices)********************
3930  template< typename VT1 // Type of the left-hand side target vector
3931  , typename MT1 // Type of the left-hand side matrix operand
3932  , typename VT2 // Type of the right-hand side vector operand
3933  , typename ST2 > // Type of the scalar value
3934  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3935  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3936  {
3937  typedef IntrinsicTrait<ElementType> IT;
3938 
3939  const size_t M( A.rows() );
3940  const size_t N( A.columns() );
3941 
3942  const size_t iblock( 32768UL / sizeof( ElementType ) );
3943  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3944 
3945  const IntrinsicType factor( set( scalar ) );
3946 
3947  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
3948 
3949  for( size_t ii=0U; ii<M; ii+=iblock ) {
3950  for( size_t jj=0UL; jj<N; jj+=jblock )
3951  {
3952  const size_t jend( min( jj+jblock, N ) );
3953  const size_t itmp( min( ii+iblock, M ) );
3954  const size_t iend( ( IsUpper<MT1>::value )
3955  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3956  :( itmp ) );
3957 
3958  size_t i( ( IsLower<MT1>::value )
3959  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
3960  :( ii ) );
3961 
3962  for( ; (i+IT::size*7UL) < iend; i+=IT::size*8UL )
3963  {
3964  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3965 
3966  for( size_t j=jj; j<jend; ++j ) {
3967  const IntrinsicType x1( set( x[j] ) );
3968  xmm1 = xmm1 + A.load(i ,j) * x1;
3969  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3970  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3971  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3972  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3973  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3974  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3975  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3976  }
3977 
3978  y.store( i , y.load(i ) + xmm1*factor );
3979  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3980  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3981  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3982  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
3983  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
3984  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
3985  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
3986  }
3987 
3988  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
3989  {
3990  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3991 
3992  for( size_t j=jj; j<jend; ++j ) {
3993  const IntrinsicType x1( set( x[j] ) );
3994  xmm1 = xmm1 + A.load(i ,j) * x1;
3995  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3996  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3997  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3998  }
3999 
4000  y.store( i , y.load(i ) + xmm1*factor );
4001  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
4002  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
4003  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
4004  }
4005 
4006  for( ; (i+IT::size*2UL) < iend; i+=IT::size*3UL )
4007  {
4008  IntrinsicType xmm1, xmm2, xmm3;
4009 
4010  for( size_t j=jj; j<jend; ++j ) {
4011  const IntrinsicType x1( set( x[j] ) );
4012  xmm1 = xmm1 + A.load(i ,j) * x1;
4013  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4014  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4015  }
4016 
4017  y.store( i , y.load(i ) + xmm1*factor );
4018  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
4019  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
4020  }
4021 
4022  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
4023  {
4024  IntrinsicType xmm1, xmm2;
4025 
4026  for( size_t j=jj; j<jend; ++j ) {
4027  const IntrinsicType x1( set( x[j] ) );
4028  xmm1 = xmm1 + A.load(i ,j) * x1;
4029  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
4030  }
4031 
4032  y.store( i , y.load(i ) + xmm1*factor );
4033  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
4034  }
4035 
4036  if( i < iend )
4037  {
4038  IntrinsicType xmm1;
4039 
4040  for( size_t j=jj; j<jend; ++j ) {
4041  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
4042  }
4043 
4044  y.store( i, y.load(i) + xmm1*factor );
4045  }
4046  }
4047  }
4048  }
4049  //**********************************************************************************************
4050 
4051  //**BLAS-based addition assignment to dense vectors (default)***********************************
4065  template< typename VT1 // Type of the left-hand side target vector
4066  , typename MT1 // Type of the left-hand side matrix operand
4067  , typename VT2 // Type of the right-hand side vector operand
4068  , typename ST2 > // Type of the scalar value
4069  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4070  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4071  {
4072  selectLargeAddAssignKernel( y, A, x, scalar );
4073  }
4074  //**********************************************************************************************
4075 
4076  //**BLAS-based addition assignment to dense vectors (single precision)**************************
4077 #if BLAZE_BLAS_MODE
4078 
4091  template< typename VT1 // Type of the left-hand side target vector
4092  , typename MT1 // Type of the left-hand side matrix operand
4093  , typename VT2 // Type of the right-hand side vector operand
4094  , typename ST2 > // Type of the scalar value
4095  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4096  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4097  {
4098  if( IsTriangular<MT1>::value ) {
4099  typename VT1::ResultType tmp( scalar * x );
4100  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4101  addAssign( y, tmp );
4102  }
4103  else {
4104  sgemv( y, A, x, scalar, 1.0F );
4105  }
4106  }
4107 #endif
4108  //**********************************************************************************************
4109 
4110  //**BLAS-based addition assignment to dense vectors (double precision)**************************
4111 #if BLAZE_BLAS_MODE
4112 
4125  template< typename VT1 // Type of the left-hand side target vector
4126  , typename MT1 // Type of the left-hand side matrix operand
4127  , typename VT2 // Type of the right-hand side vector operand
4128  , typename ST2 > // Type of the scalar value
4129  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4130  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4131  {
4132  if( IsTriangular<MT1>::value ) {
4133  typename VT1::ResultType tmp( scalar * x );
4134  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4135  addAssign( y, tmp );
4136  }
4137  else {
4138  dgemv( y, A, x, scalar, 1.0 );
4139  }
4140  }
4141 #endif
4142  //**********************************************************************************************
4143 
4144  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
4145 #if BLAZE_BLAS_MODE
4146 
4159  template< typename VT1 // Type of the left-hand side target vector
4160  , typename MT1 // Type of the left-hand side matrix operand
4161  , typename VT2 // Type of the right-hand side vector operand
4162  , typename ST2 > // Type of the scalar value
4163  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4164  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4165  {
4166  if( IsTriangular<MT1>::value ) {
4167  typename VT1::ResultType tmp( scalar * x );
4168  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4169  addAssign( y, tmp );
4170  }
4171  else {
4172  cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4173  }
4174  }
4175 #endif
4176  //**********************************************************************************************
4177 
4178  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
4179 #if BLAZE_BLAS_MODE
4180 
4193  template< typename VT1 // Type of the left-hand side target vector
4194  , typename MT1 // Type of the left-hand side matrix operand
4195  , typename VT2 // Type of the right-hand side vector operand
4196  , typename ST2 > // Type of the scalar value
4197  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4198  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4199  {
4200  if( IsTriangular<MT1>::value ) {
4201  typename VT1::ResultType tmp( scalar * x );
4202  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4203  addAssign( y, tmp );
4204  }
4205  else {
4206  zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4207  }
4208  }
4209 #endif
4210  //**********************************************************************************************
4211 
4212  //**Addition assignment to sparse vectors*******************************************************
4213  // No special implementation for the addition assignment to sparse vectors.
4214  //**********************************************************************************************
4215 
4216  //**Subtraction assignment to dense vectors*****************************************************
4228  template< typename VT1 > // Type of the target dense vector
4229  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4230  {
4232 
4233  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4234 
4235  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4236  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4237 
4238  if( left.rows() == 0UL || left.columns() == 0UL ) {
4239  return;
4240  }
4241 
4242  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4243  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
4244 
4245  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4246  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4247  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4248  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4249 
4250  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
4251  }
4252  //**********************************************************************************************
4253 
4254  //**Subtraction assignment to dense vectors (kernel selection)**********************************
4265  template< typename VT1 // Type of the left-hand side target vector
4266  , typename MT1 // Type of the left-hand side matrix operand
4267  , typename VT2 // Type of the right-hand side vector operand
4268  , typename ST2 > // Type of the scalar value
4269  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4270  {
4271  if( ( IsDiagonal<MT1>::value ) ||
4272  ( IsComputation<MT>::value && !evaluateMatrix ) ||
4273  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
4274  selectSmallSubAssignKernel( y, A, x, scalar );
4275  else
4276  selectBlasSubAssignKernel( y, A, x, scalar );
4277  }
4278  //**********************************************************************************************
4279 
4280  //**Default subtraction assignment to dense vectors*********************************************
4294  template< typename VT1 // Type of the left-hand side target vector
4295  , typename MT1 // Type of the left-hand side matrix operand
4296  , typename VT2 // Type of the right-hand side vector operand
4297  , typename ST2 > // Type of the scalar value
4298  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4299  {
4300  y.subAssign( A * x * scalar );
4301  }
4302  //**********************************************************************************************
4303 
4304  //**Default subtraction assignment to dense vectors (small matrices)****************************
4318  template< typename VT1 // Type of the left-hand side target vector
4319  , typename MT1 // Type of the left-hand side matrix operand
4320  , typename VT2 // Type of the right-hand side vector operand
4321  , typename ST2 > // Type of the scalar value
4322  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4323  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4324  {
4325  selectDefaultSubAssignKernel( y, A, x, scalar );
4326  }
4327  //**********************************************************************************************
4328 
4329  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
4344  template< typename VT1 // Type of the left-hand side target vector
4345  , typename MT1 // Type of the left-hand side matrix operand
4346  , typename VT2 // Type of the right-hand side vector operand
4347  , typename ST2 > // Type of the scalar value
4348  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4349  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4350  {
4351  typedef IntrinsicTrait<ElementType> IT;
4352 
4353  const size_t M( A.rows() );
4354  const size_t N( A.columns() );
4355 
4356  const IntrinsicType factor( set( scalar ) );
4357 
4358  size_t i( 0UL );
4359 
4360  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL )
4361  {
4362  const size_t jbegin( ( IsUpper<MT1>::value )
4363  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4364  :( 0UL ) );
4365  const size_t jend( ( IsLower<MT1>::value )
4366  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4367  :( N ) );
4368  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4369 
4370  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4371 
4372  for( size_t j=jbegin; j<jend; ++j ) {
4373  const IntrinsicType x1( set( x[j] ) );
4374  xmm1 = xmm1 + A.load(i ,j) * x1;
4375  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4376  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4377  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
4378  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
4379  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
4380  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
4381  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
4382  }
4383 
4384  y.store( i , y.load(i ) - xmm1*factor );
4385  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4386  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4387  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
4388  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
4389  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
4390  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
4391  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
4392  }
4393 
4394  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
4395  {
4396  const size_t jbegin( ( IsUpper<MT1>::value )
4397  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4398  :( 0UL ) );
4399  const size_t jend( ( IsLower<MT1>::value )
4400  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4401  :( N ) );
4402  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4403 
4404  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4405 
4406  for( size_t j=jbegin; j<jend; ++j ) {
4407  const IntrinsicType x1( set( x[j] ) );
4408  xmm1 = xmm1 + A.load(i ,j) * x1;
4409  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4410  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4411  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
4412  }
4413 
4414  y.store( i , y.load(i ) - xmm1*factor );
4415  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4416  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4417  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
4418  }
4419 
4420  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL )
4421  {
4422  const size_t jbegin( ( IsUpper<MT1>::value )
4423  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4424  :( 0UL ) );
4425  const size_t jend( ( IsLower<MT1>::value )
4426  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4427  :( N ) );
4428  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4429 
4430  IntrinsicType xmm1, xmm2, xmm3;
4431 
4432  for( size_t j=jbegin; j<jend; ++j ) {
4433  const IntrinsicType x1( set( x[j] ) );
4434  xmm1 = xmm1 + A.load(i ,j) * x1;
4435  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4436  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4437  }
4438 
4439  y.store( i , y.load(i ) - xmm1*factor );
4440  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4441  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4442  }
4443 
4444  for( ; (i+IT::size) < M; i+=IT::size*2UL )
4445  {
4446  const size_t jbegin( ( IsUpper<MT1>::value )
4447  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4448  :( 0UL ) );
4449  const size_t jend( ( IsLower<MT1>::value )
4450  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4451  :( N ) );
4452  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4453 
4454  IntrinsicType xmm1, xmm2;
4455 
4456  for( size_t j=jbegin; j<jend; ++j ) {
4457  const IntrinsicType x1( set( x[j] ) );
4458  xmm1 = xmm1 + A.load(i ,j) * x1;
4459  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
4460  }
4461 
4462  y.store( i , y.load(i ) - xmm1*factor );
4463  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
4464  }
4465  if( i < M )
4466  {
4467  const size_t jbegin( ( IsUpper<MT1>::value )
4468  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4469  :( 0UL ) );
4470  const size_t jend( ( IsLower<MT1>::value )
4471  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4472  :( N ) );
4473  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4474 
4475  IntrinsicType xmm1;
4476 
4477  for( size_t j=jbegin; j<jend; ++j ) {
4478  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
4479  }
4480 
4481  y.store( i, y.load(i) - xmm1*factor );
4482  }
4483  }
4484  //**********************************************************************************************
4485 
4486  //**Default subtraction assignment to dense vectors (large matrices)****************************
4500  template< typename VT1 // Type of the left-hand side target vector
4501  , typename MT1 // Type of the left-hand side matrix operand
4502  , typename VT2 // Type of the right-hand side vector operand
4503  , typename ST2 > // Type of the scalar value
4504  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4505  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4506  {
4507  selectDefaultSubAssignKernel( y, A, x, scalar );
4508  }
4509  //**********************************************************************************************
4510 
4511  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4526  template< typename VT1 // Type of the left-hand side target vector
4527  , typename MT1 // Type of the left-hand side matrix operand
4528  , typename VT2 // Type of the right-hand side vector operand
4529  , typename ST2 > // Type of the scalar value
4530  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4531  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4532  {
4533  typedef IntrinsicTrait<ElementType> IT;
4534 
4535  const size_t M( A.rows() );
4536  const size_t N( A.columns() );
4537 
4538  const size_t iblock( 32768UL / sizeof( ElementType ) );
4539  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
4540 
4541  const IntrinsicType factor( set( scalar ) );
4542 
4543  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
4544 
4545  for( size_t ii=0U; ii<M; ii+=iblock ) {
4546  for( size_t jj=0UL; jj<N; jj+=jblock )
4547  {
4548  const size_t jend( min( jj+jblock, N ) );
4549  const size_t itmp( min( ii+iblock, M ) );
4550  const size_t iend( ( IsUpper<MT1>::value )
4551  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
4552  :( itmp ) );
4553 
4554  size_t i( ( IsLower<MT1>::value )
4555  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
4556  :( ii ) );
4557 
4558  for( ; (i+IT::size*7UL) < iend; i+=IT::size*8UL )
4559  {
4560  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4561 
4562  for( size_t j=jj; j<jend; ++j ) {
4563  const IntrinsicType x1( set( x[j] ) );
4564  xmm1 = xmm1 + A.load(i ,j) * x1;
4565  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4566  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4567  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
4568  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
4569  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
4570  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
4571  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
4572  }
4573 
4574  y.store( i , y.load(i ) - xmm1*factor );
4575  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4576  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4577  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
4578  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
4579  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
4580  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
4581  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
4582  }
4583 
4584  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
4585  {
4586  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4587 
4588  for( size_t j=jj; j<jend; ++j ) {
4589  const IntrinsicType x1( set( x[j] ) );
4590  xmm1 = xmm1 + A.load(i ,j) * x1;
4591  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4592  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4593  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
4594  }
4595 
4596  y.store( i , y.load(i ) - xmm1*factor );
4597  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4598  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4599  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
4600  }
4601 
4602  for( ; (i+IT::size*2UL) < iend; i+=IT::size*3UL )
4603  {
4604  IntrinsicType xmm1, xmm2, xmm3;
4605 
4606  for( size_t j=jj; j<jend; ++j ) {
4607  const IntrinsicType x1( set( x[j] ) );
4608  xmm1 = xmm1 + A.load(i ,j) * x1;
4609  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4610  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4611  }
4612 
4613  y.store( i , y.load(i ) - xmm1*factor );
4614  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4615  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4616  }
4617 
4618  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
4619  {
4620  IntrinsicType xmm1, xmm2;
4621 
4622  for( size_t j=jj; j<jend; ++j ) {
4623  const IntrinsicType x1( set( x[j] ) );
4624  xmm1 = xmm1 + A.load(i ,j) * x1;
4625  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
4626  }
4627 
4628  y.store( i , y.load(i ) - xmm1*factor );
4629  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
4630  }
4631 
4632  if( i < iend )
4633  {
4634  IntrinsicType xmm1;
4635 
4636  for( size_t j=jj; j<jend; ++j ) {
4637  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
4638  }
4639 
4640  y.store( i, y.load(i) - xmm1*factor );
4641  }
4642  }
4643  }
4644  }
4645  //**********************************************************************************************
4646 
4647  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4661  template< typename VT1 // Type of the left-hand side target vector
4662  , typename MT1 // Type of the left-hand side matrix operand
4663  , typename VT2 // Type of the right-hand side vector operand
4664  , typename ST2 > // Type of the scalar value
4665  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4666  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4667  {
4668  selectLargeSubAssignKernel( y, A, x, scalar );
4669  }
4670  //**********************************************************************************************
4671 
4672  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
4673 #if BLAZE_BLAS_MODE
4674 
4687  template< typename VT1 // Type of the left-hand side target vector
4688  , typename MT1 // Type of the left-hand side matrix operand
4689  , typename VT2 // Type of the right-hand side vector operand
4690  , typename ST2 > // Type of the scalar value
4691  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4692  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4693  {
4694  if( IsTriangular<MT1>::value ) {
4695  typename VT1::ResultType tmp( scalar * x );
4696  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4697  subAssign( y, tmp );
4698  }
4699  else {
4700  sgemv( y, A, x, -scalar, 1.0F );
4701  }
4702  }
4703 #endif
4704  //**********************************************************************************************
4705 
4706  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
4707 #if BLAZE_BLAS_MODE
4708 
4721  template< typename VT1 // Type of the left-hand side target vector
4722  , typename MT1 // Type of the left-hand side matrix operand
4723  , typename VT2 // Type of the right-hand side vector operand
4724  , typename ST2 > // Type of the scalar value
4725  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4726  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4727  {
4728  if( IsTriangular<MT1>::value ) {
4729  typename VT1::ResultType tmp( scalar * x );
4730  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4731  subAssign( y, tmp );
4732  }
4733  else {
4734  dgemv( y, A, x, -scalar, 1.0 );
4735  }
4736  }
4737 #endif
4738  //**********************************************************************************************
4739 
4740  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
4741 #if BLAZE_BLAS_MODE
4742 
4755  template< typename VT1 // Type of the left-hand side target vector
4756  , typename MT1 // Type of the left-hand side matrix operand
4757  , typename VT2 // Type of the right-hand side vector operand
4758  , typename ST2 > // Type of the scalar value
4759  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4760  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4761  {
4762  if( IsTriangular<MT1>::value ) {
4763  typename VT1::ResultType tmp( scalar * x );
4764  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4765  subAssign( y, tmp );
4766  }
4767  else {
4768  cgemv( y, A, x, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4769  }
4770  }
4771 #endif
4772  //**********************************************************************************************
4773 
4774  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
4775 #if BLAZE_BLAS_MODE
4776 
4789  template< typename VT1 // Type of the left-hand side target vector
4790  , typename MT1 // Type of the left-hand side matrix operand
4791  , typename VT2 // Type of the right-hand side vector operand
4792  , typename ST2 > // Type of the scalar value
4793  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4794  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4795  {
4796  if( IsTriangular<MT1>::value ) {
4797  typename VT1::ResultType tmp( scalar * x );
4798  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4799  subAssign( y, tmp );
4800  }
4801  else {
4802  zgemv( y, A, x, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4803  }
4804  }
4805 #endif
4806  //**********************************************************************************************
4807 
4808  //**Subtraction assignment to sparse vectors****************************************************
4809  // No special implementation for the subtraction assignment to sparse vectors.
4810  //**********************************************************************************************
4811 
4812  //**Multiplication assignment to dense vectors**************************************************
4824  template< typename VT1 > // Type of the target dense vector
4825  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4826  {
4828 
4832 
4833  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4834 
4835  const ResultType tmp( serial( rhs ) );
4836  multAssign( ~lhs, tmp );
4837  }
4838  //**********************************************************************************************
4839 
4840  //**Multiplication assignment to sparse vectors*************************************************
4841  // No special implementation for the multiplication assignment to sparse vectors.
4842  //**********************************************************************************************
4843 
4844  //**SMP assignment to dense vectors**************************************************************
4858  template< typename VT1 > // Type of the target dense vector
4859  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4860  smpAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4861  {
4863 
4864  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4865 
4866  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4867  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4868 
4869  if( left.rows() == 0UL ) {
4870  return;
4871  }
4872  else if( left.columns() == 0UL ) {
4873  reset( ~lhs );
4874  return;
4875  }
4876 
4877  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4878  RT x( right ); // Evaluation of the right-hand side dense vector operand
4879 
4880  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4881  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4882  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4883  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4884 
4885  smpAssign( ~lhs, A * x * rhs.scalar_ );
4886  }
4887  //**********************************************************************************************
4888 
4889  //**SMP assignment to sparse vectors************************************************************
4903  template< typename VT1 > // Type of the target sparse vector
4904  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4905  smpAssign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4906  {
4908 
4912 
4913  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4914 
4915  const ResultType tmp( rhs );
4916  smpAssign( ~lhs, tmp );
4917  }
4918  //**********************************************************************************************
4919 
4920  //**SMP addition assignment to dense vectors****************************************************
4934  template< typename VT1 > // Type of the target dense vector
4935  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4936  smpAddAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4937  {
4939 
4940  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4941 
4942  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4943  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4944 
4945  if( left.rows() == 0UL || left.columns() == 0UL ) {
4946  return;
4947  }
4948 
4949  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4950  RT x( right ); // Evaluation of the right-hand side dense vector operand
4951 
4952  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4953  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4954  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4955  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4956 
4957  smpAddAssign( ~lhs, A * x * rhs.scalar_ );
4958  }
4959  //**********************************************************************************************
4960 
4961  //**SMP addition assignment to sparse vectors***************************************************
4962  // No special implementation for the SMP addition assignment to sparse vectors.
4963  //**********************************************************************************************
4964 
4965  //**SMP subtraction assignment to dense vectors*************************************************
4979  template< typename VT1 > // Type of the target dense vector
4980  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4981  smpSubAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4982  {
4984 
4985  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4986 
4987  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4988  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4989 
4990  if( left.rows() == 0UL || left.columns() == 0UL ) {
4991  return;
4992  }
4993 
4994  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4995  RT x( right ); // Evaluation of the right-hand side dense vector operand
4996 
4997  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4998  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4999  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
5000  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
5001 
5002  smpSubAssign( ~lhs, A * x * rhs.scalar_ );
5003  }
5004  //**********************************************************************************************
5005 
5006  //**SMP subtraction assignment to sparse vectors************************************************
5007  // No special implementation for the SMP subtraction assignment to sparse vectors.
5008  //**********************************************************************************************
5009 
5010  //**SMP multiplication assignment to dense vectors**********************************************
5025  template< typename VT1 > // Type of the target dense vector
5026  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5027  smpMultAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
5028  {
5030 
5034 
5035  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5036 
5037  const ResultType tmp( rhs );
5038  smpMultAssign( ~lhs, tmp );
5039  }
5040  //**********************************************************************************************
5041 
5042  //**SMP multiplication assignment to sparse vectors*********************************************
5043  // No special implementation for the SMP multiplication assignment to sparse vectors.
5044  //**********************************************************************************************
5045 
5046  //**Compile time checks*************************************************************************
5054  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
5055  //**********************************************************************************************
5056 };
5058 //*************************************************************************************************
5059 
5060 
5061 
5062 
5063 //=================================================================================================
5064 //
5065 // GLOBAL BINARY ARITHMETIC OPERATORS
5066 //
5067 //=================================================================================================
5068 
5069 //*************************************************************************************************
5100 template< typename T1 // Type of the left-hand side dense matrix
5101  , typename T2 > // Type of the right-hand side dense vector
5102 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
5104 {
5106 
5107  if( (~mat).columns() != (~vec).size() )
5108  throw std::invalid_argument( "Matrix and vector sizes do not match" );
5109 
5110  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
5111 }
5112 //*************************************************************************************************
5113 
5114 
5115 
5116 
5117 //=================================================================================================
5118 //
5119 // SIZE SPECIALIZATIONS
5120 //
5121 //=================================================================================================
5122 
5123 //*************************************************************************************************
5125 template< typename MT, typename VT >
5126 struct Size< TDMatDVecMultExpr<MT,VT> >
5127  : public Rows<MT>
5128 {};
5130 //*************************************************************************************************
5131 
5132 
5133 
5134 
5135 //=================================================================================================
5136 //
5137 // EXPRESSION TRAIT SPECIALIZATIONS
5138 //
5139 //=================================================================================================
5140 
5141 //*************************************************************************************************
5143 template< typename MT, typename VT, bool AF >
5144 struct SubvectorExprTrait< TDMatDVecMultExpr<MT,VT>, AF >
5145 {
5146  public:
5147  //**********************************************************************************************
5148  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type
5149  , typename SubvectorExprTrait<const VT,AF>::Type >::Type Type;
5150  //**********************************************************************************************
5151 };
5153 //*************************************************************************************************
5154 
5155 } // namespace blaze
5156 
5157 #endif
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for mathematical functions.
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:272
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:121
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:276
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:126
Constraint on the data type.
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:119
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:290
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:376
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:410
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:263
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
Header file for the IsMatMatMultExpr type trait class.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:281
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
Constraints on the storage order of matrix types.
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:124
Header file for the serial shim.
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:312
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:326
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:386
Header file for the HasConstDataAccess type trait.
Header file for BLAS level 2 functions.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:275
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:122
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
const size_t TDMATDVECMULT_THRESHOLD
Column-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:74
Base template for the MultTrait class.
Definition: MultTrait.h:150
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:398
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:120
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:287
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:284
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:274
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:366
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:273
const size_t SMP_TDMATDVECMULT_THRESHOLD
SMP column-major dense matrix/dense vector multiplication threshold.This threshold specifies when a c...
Definition: Thresholds.h:345
Header file for the HasMutableDataAccess type trait.
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:123
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Header file for all intrinsic functionality.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:430
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
Header file for the IsUpper type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:440
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:441
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:278
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:277
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:420
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849