All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
48 #include <blaze/system/BLAS.h>
50 #include <blaze/util/Assert.h>
51 #include <blaze/util/Complex.h>
56 #include <blaze/util/DisableIf.h>
57 #include <blaze/util/EnableIf.h>
58 #include <blaze/util/SelectType.h>
59 #include <blaze/util/Types.h>
65 
66 
67 namespace blaze {
68 
69 //=================================================================================================
70 //
71 // CLASS TDMATDVECMULTEXPR
72 //
73 //=================================================================================================
74 
75 //*************************************************************************************************
82 template< typename MT // Type of the left-hand side dense matrix
83  , typename VT > // Type of the right-hand side dense vector
84 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
85  , private Expression
86  , private Computation
87 {
88  private:
89  //**Type definitions****************************************************************************
90  typedef typename MT::ResultType MRT;
91  typedef typename VT::ResultType VRT;
92  typedef typename MRT::ElementType MET;
93  typedef typename VRT::ElementType VET;
94  typedef typename MT::CompositeType MCT;
95  typedef typename VT::CompositeType VCT;
96  //**********************************************************************************************
97 
98  //**********************************************************************************************
100  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
102  //**********************************************************************************************
103 
104  //**********************************************************************************************
106 
107 
110  template< typename T1, typename T2, typename T3 >
111  struct UseSinglePrecisionKernel {
115  };
117  //**********************************************************************************************
118 
119  //**********************************************************************************************
121 
122 
125  template< typename T1, typename T2, typename T3 >
126  struct UseDoublePrecisionKernel {
127  enum { value = IsDouble<typename T1::ElementType>::value &&
128  IsDouble<typename T2::ElementType>::value &&
129  IsDouble<typename T3::ElementType>::value };
130  };
132  //**********************************************************************************************
133 
134  //**********************************************************************************************
136 
137 
140  template< typename T1, typename T2, typename T3 >
141  struct UseSinglePrecisionComplexKernel {
142  typedef complex<float> Type;
143  enum { value = IsSame<typename T1::ElementType,Type>::value &&
144  IsSame<typename T2::ElementType,Type>::value &&
145  IsSame<typename T3::ElementType,Type>::value };
146  };
148  //**********************************************************************************************
149 
150  //**********************************************************************************************
152 
153 
156  template< typename T1, typename T2, typename T3 >
157  struct UseDoublePrecisionComplexKernel {
158  typedef complex<double> Type;
159  enum { value = IsSame<typename T1::ElementType,Type>::value &&
160  IsSame<typename T2::ElementType,Type>::value &&
161  IsSame<typename T3::ElementType,Type>::value };
162  };
164  //**********************************************************************************************
165 
166  //**********************************************************************************************
168 
169 
171  template< typename T1, typename T2, typename T3 >
172  struct UseDefaultKernel {
173  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
174  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
175  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
176  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
177  };
179  //**********************************************************************************************
180 
181  //**********************************************************************************************
183 
184 
187  template< typename T1, typename T2, typename T3 >
188  struct UseVectorizedDefaultKernel {
189  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
190  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
191  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
192  IntrinsicTrait<typename T1::ElementType>::addition &&
193  IntrinsicTrait<typename T1::ElementType>::multiplication };
194  };
196  //**********************************************************************************************
197 
198  public:
199  //**Type definitions****************************************************************************
202  typedef typename ResultType::TransposeType TransposeType;
203  typedef typename ResultType::ElementType ElementType;
205  typedef const ElementType ReturnType;
206  typedef const ResultType CompositeType;
207 
209  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
210 
212  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
213 
216 
218  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
219  //**********************************************************************************************
220 
221  //**Compilation flags***************************************************************************
223  enum { vectorizable = 0 };
224 
226  enum { canAlias = ( !evaluate && IsComputation<MT>::value &&
229  //**********************************************************************************************
230 
231  //**Constructor*********************************************************************************
237  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
238  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
239  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
240  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
241  {
242  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
243  }
244  //**********************************************************************************************
245 
246  //**Subscript operator**************************************************************************
252  inline ReturnType operator[]( size_t index ) const {
253  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
254 
255  ElementType res;
256 
257  if( mat_.columns() != 0UL ) {
258  res = mat_(index,0UL) * vec_[0UL];
259  for( size_t j=1UL; j<end_; j+=2UL ) {
260  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
261  }
262  if( end_ < mat_.columns() ) {
263  res += mat_(index,end_) * vec_[end_];
264  }
265  }
266  else {
267  reset( res );
268  }
269 
270  return res;
271  }
272  //**********************************************************************************************
273 
274  //**Size function*******************************************************************************
279  inline size_t size() const {
280  return mat_.rows();
281  }
282  //**********************************************************************************************
283 
284  //**Left function*******************************************************************************
289  inline LeftOperand leftOperand() const {
290  return mat_;
291  }
292  //**********************************************************************************************
293 
294  //**Right function******************************************************************************
299  inline RightOperand rightOperand() const {
300  return vec_;
301  }
302  //**********************************************************************************************
303 
304  //**********************************************************************************************
310  template< typename T >
311  inline bool isAliased( const T* alias ) const {
312  return ( !evaluate && IsComputation<MT>::value && !RequiresEvaluation<MT>::value &&
313  CanAlias<MT>::value && mat_.isAliased( alias ) ) ||
314  ( !IsComputation<VT>::value && vec_.isAliased( alias ) );
315  }
316  //**********************************************************************************************
317 
318  private:
319  //**Member variables****************************************************************************
322  const size_t end_;
323  //**********************************************************************************************
324 
325  //**Assignment to dense vectors*****************************************************************
337  template< typename VT1 > // Type of the target dense vector
338  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
339  {
340  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
341 
342  if( rhs.mat_.rows() == 0UL ) {
343  return;
344  }
345  else if( rhs.mat_.columns() == 0UL ) {
346  reset( ~lhs );
347  return;
348  }
349 
350  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
351  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
352 
353  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
354  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
355  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
356  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
357 
358  if( ( IsComputation<MT>::value && !evaluate ) ||
359  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
360  TDMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
361  else
362  TDMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
363  }
365  //**********************************************************************************************
366 
367  //**Default assignment to dense vectors*********************************************************
381  template< typename VT1 // Type of the left-hand side target vector
382  , typename MT1 // Type of the left-hand side matrix operand
383  , typename VT2 > // Type of the right-hand side vector operand
384  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
385  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
386  {
387  const size_t M( A.rows() );
388  const size_t N( A.columns() );
389 
390  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
391  const size_t iend( M & size_t(-2) );
392 
393  for( size_t i=0UL; i<M; ++i ) {
394  y[i] = x[0UL] * A(i,0UL);
395  }
396  for( size_t j=1UL; j<N; ++j ) {
397  for( size_t i=0UL; i<iend; i+=2UL ) {
398  y[i ] += x[j] * A(i ,j);
399  y[i+1UL] += x[j] * A(i+1UL,j);
400  }
401  if( iend < M ) {
402  y[iend] += x[j] * A(iend,j);
403  }
404  }
405  }
407  //**********************************************************************************************
408 
409  //**Vectorized default assignment to dense vectors**********************************************
423  template< typename VT1 // Type of the left-hand side target vector
424  , typename MT1 // Type of the left-hand side matrix operand
425  , typename VT2 > // Type of the right-hand side vector operand
426  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
427  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
428  {
429  typedef IntrinsicTrait<ElementType> IT;
430 
431  const size_t M( A.spacing() );
432  const size_t N( A.columns() );
433 
434  size_t i( 0UL );
435 
436  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
437  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
438  for( size_t j=0UL; j<N; ++j ) {
439  const IntrinsicType x1( set( x[j] ) );
440  xmm1 = xmm1 + A.get(i ,j) * x1;
441  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
442  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
443  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
444  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
445  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
446  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
447  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
448  }
449  store( &y[i ], xmm1 );
450  store( &y[i+IT::size ], xmm2 );
451  store( &y[i+IT::size*2UL], xmm3 );
452  store( &y[i+IT::size*3UL], xmm4 );
453  store( &y[i+IT::size*4UL], xmm5 );
454  store( &y[i+IT::size*5UL], xmm6 );
455  store( &y[i+IT::size*6UL], xmm7 );
456  store( &y[i+IT::size*7UL], xmm8 );
457  }
458  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
459  IntrinsicType xmm1, xmm2, xmm3, xmm4;
460  for( size_t j=0UL; j<N; ++j ) {
461  const IntrinsicType x1( set( x[j] ) );
462  xmm1 = xmm1 + A.get(i ,j) * x1;
463  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
464  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
465  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
466  }
467  store( &y[i ], xmm1 );
468  store( &y[i+IT::size ], xmm2 );
469  store( &y[i+IT::size*2UL], xmm3 );
470  store( &y[i+IT::size*3UL], xmm4 );
471  }
472  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
473  IntrinsicType xmm1, xmm2, xmm3;
474  for( size_t j=0UL; j<N; ++j ) {
475  const IntrinsicType x1( set( x[j] ) );
476  xmm1 = xmm1 + A.get(i ,j) * x1;
477  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
478  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
479  }
480  store( &y[i ], xmm1 );
481  store( &y[i+IT::size ], xmm2 );
482  store( &y[i+IT::size*2UL], xmm3 );
483  }
484  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
485  IntrinsicType xmm1, xmm2;
486  for( size_t j=0UL; j<N; ++j ) {
487  const IntrinsicType x1( set( x[j] ) );
488  xmm1 = xmm1 + A.get(i ,j) * x1;
489  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
490  }
491  store( &y[i ], xmm1 );
492  store( &y[i+IT::size], xmm2 );
493  }
494  if( i < M ) {
495  IntrinsicType xmm1;
496  for( size_t j=0UL; j<N; ++j ) {
497  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
498  }
499  store( &y[i], xmm1 );
500  }
501  }
503  //**********************************************************************************************
504 
505  //**BLAS-based assignment to dense vectors (default)********************************************
519  template< typename VT1 // Type of the left-hand side target vector
520  , typename MT1 // Type of the left-hand side matrix operand
521  , typename VT2 > // Type of the right-hand side vector operand
522  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
523  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
524  {
525  selectDefaultAssignKernel( y, A, x );
526  }
528  //**********************************************************************************************
529 
530  //**BLAS-based assignment to dense vectors (single precision)***********************************
531 #if BLAZE_BLAS_MODE
532 
545  template< typename VT1 // Type of the left-hand side target vector
546  , typename MT1 // Type of the left-hand side matrix operand
547  , typename VT2 > // Type of the right-hand side vector operand
548  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
549  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
550  {
551  using boost::numeric_cast;
552 
553  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
554  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
555  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
556 
557  const int M ( numeric_cast<int>( A.rows() ) );
558  const int N ( numeric_cast<int>( A.columns() ) );
559  const int lda( numeric_cast<int>( A.spacing() ) );
560 
561  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
562  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
563  }
565 #endif
566  //**********************************************************************************************
567 
568  //**BLAS-based assignment to dense vectors (double precision)***********************************
569 #if BLAZE_BLAS_MODE
570 
583  template< typename VT1 // Type of the left-hand side target vector
584  , typename MT1 // Type of the left-hand side matrix operand
585  , typename VT2 > // Type of the right-hand side vector operand
586  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
587  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
588  {
589  using boost::numeric_cast;
590 
591  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
592  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
593  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
594 
595  const int M ( numeric_cast<int>( A.rows() ) );
596  const int N ( numeric_cast<int>( A.columns() ) );
597  const int lda( numeric_cast<int>( A.spacing() ) );
598 
599  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
600  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
601  }
603 #endif
604  //**********************************************************************************************
605 
606  //**BLAS-based assignment to dense vectors (single precision complex)***************************
607 #if BLAZE_BLAS_MODE
608 
621  template< typename VT1 // Type of the left-hand side target vector
622  , typename MT1 // Type of the left-hand side matrix operand
623  , typename VT2 > // Type of the right-hand side vector operand
624  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
625  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
626  {
627  using boost::numeric_cast;
628 
629  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
630  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
631  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
632  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
633  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
634  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
635 
636  const int M ( numeric_cast<int>( A.rows() ) );
637  const int N ( numeric_cast<int>( A.columns() ) );
638  const int lda( numeric_cast<int>( A.spacing() ) );
639  const complex<float> alpha( 1.0F, 0.0F );
640  const complex<float> beta ( 0.0F, 0.0F );
641 
642  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
643  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
644  }
646 #endif
647  //**********************************************************************************************
648 
649  //**BLAS-based assignment to dense vectors (double precision complex)***************************
650 #if BLAZE_BLAS_MODE
651 
664  template< typename VT1 // Type of the left-hand side target vector
665  , typename MT1 // Type of the left-hand side matrix operand
666  , typename VT2 > // Type of the right-hand side vector operand
667  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
668  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
669  {
670  using boost::numeric_cast;
671 
672  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
673  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
674  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
675  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
676  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
677  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
678 
679  const int M ( numeric_cast<int>( A.rows() ) );
680  const int N ( numeric_cast<int>( A.columns() ) );
681  const int lda( numeric_cast<int>( A.spacing() ) );
682  const complex<double> alpha( 1.0, 0.0 );
683  const complex<double> beta ( 0.0, 0.0 );
684 
685  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
686  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
687  }
689 #endif
690  //**********************************************************************************************
691 
692  //**Assignment to sparse vectors****************************************************************
704  template< typename VT1 > // Type of the target sparse vector
705  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
706  {
709  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
710 
711  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
712 
713  const ResultType tmp( rhs );
714  assign( ~lhs, tmp );
715  }
717  //**********************************************************************************************
718 
719  //**Addition assignment to dense vectors********************************************************
732  template< typename VT1 > // Type of the target dense vector
733  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
734  {
735  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
736 
737  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
738  return;
739  }
740 
741  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
742  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
743 
744  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
745  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
746  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
747  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
748 
749  if( ( IsComputation<MT>::value && !evaluate ) ||
750  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
751  TDMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
752  else
753  TDMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
754  }
756  //**********************************************************************************************
757 
758  //**Default addition assignment to dense vectors************************************************
772  template< typename VT1 // Type of the left-hand side target vector
773  , typename MT1 // Type of the left-hand side matrix operand
774  , typename VT2 > // Type of the right-hand side vector operand
775  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
776  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
777  {
778  const size_t M( A.rows() );
779  const size_t N( A.columns() );
780 
781  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
782  const size_t iend( M & size_t(-2) );
783 
784  for( size_t j=0UL; j<N; ++j ) {
785  for( size_t i=0UL; i<iend; i+=2UL ) {
786  y[i ] += x[j] * A(i ,j);
787  y[i+1UL] += x[j] * A(i+1UL,j);
788  }
789  if( iend < M ) {
790  y[iend] += x[j] * A(iend,j);
791  }
792  }
793  }
795  //**********************************************************************************************
796 
797  //**Vectorized default addition assignment to dense vectors*************************************
811  template< typename VT1 // Type of the left-hand side target vector
812  , typename MT1 // Type of the left-hand side matrix operand
813  , typename VT2 > // Type of the right-hand side vector operand
814  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
815  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
816  {
817  typedef IntrinsicTrait<ElementType> IT;
818 
819  const size_t M( A.spacing() );
820  const size_t N( A.columns() );
821 
822  size_t i( 0UL );
823 
824  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
825  IntrinsicType xmm1( load( &y[i ] ) );
826  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
827  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
828  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
829  IntrinsicType xmm5( load( &y[i+IT::size*4UL] ) );
830  IntrinsicType xmm6( load( &y[i+IT::size*5UL] ) );
831  IntrinsicType xmm7( load( &y[i+IT::size*6UL] ) );
832  IntrinsicType xmm8( load( &y[i+IT::size*7UL] ) );
833  for( size_t j=0UL; j<N; ++j ) {
834  const IntrinsicType x1( set( x[j] ) );
835  xmm1 = xmm1 + A.get(i ,j) * x1;
836  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
837  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
838  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
839  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
840  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
841  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
842  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
843  }
844  store( &y[i ], xmm1 );
845  store( &y[i+IT::size ], xmm2 );
846  store( &y[i+IT::size*2UL], xmm3 );
847  store( &y[i+IT::size*3UL], xmm4 );
848  store( &y[i+IT::size*4UL], xmm5 );
849  store( &y[i+IT::size*5UL], xmm6 );
850  store( &y[i+IT::size*6UL], xmm7 );
851  store( &y[i+IT::size*7UL], xmm8 );
852  }
853  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
854  IntrinsicType xmm1( load( &y[i ] ) );
855  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
856  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
857  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
858  for( size_t j=0UL; j<N; ++j ) {
859  const IntrinsicType x1( set( x[j] ) );
860  xmm1 = xmm1 + A.get(i ,j) * x1;
861  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
862  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
863  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
864  }
865  store( &y[i ], xmm1 );
866  store( &y[i+IT::size ], xmm2 );
867  store( &y[i+IT::size*2UL], xmm3 );
868  store( &y[i+IT::size*3UL], xmm4 );
869  }
870  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
871  IntrinsicType xmm1( load( &y[i ] ) );
872  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
873  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
874  for( size_t j=0UL; j<N; ++j ) {
875  const IntrinsicType x1( set( x[j] ) );
876  xmm1 = xmm1 + A.get(i ,j) * x1;
877  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
878  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
879  }
880  store( &y[i ], xmm1 );
881  store( &y[i+IT::size ], xmm2 );
882  store( &y[i+IT::size*2UL], xmm3 );
883  }
884  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
885  IntrinsicType xmm1( load( &y[i ] ) );
886  IntrinsicType xmm2( load( &y[i+IT::size] ) );
887  for( size_t j=0UL; j<N; ++j ) {
888  const IntrinsicType x1( set( x[j] ) );
889  xmm1 = xmm1 + A.get(i ,j) * x1;
890  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
891  }
892  store( &y[i ], xmm1 );
893  store( &y[i+IT::size], xmm2 );
894  }
895  if( i < M ) {
896  IntrinsicType xmm1( load( &y[i] ) );
897  for( size_t j=0UL; j<N; ++j ) {
898  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
899  }
900  store( &y[i], xmm1 );
901  }
902  }
904  //**********************************************************************************************
905 
906  //**BLAS-based addition assignment to dense vectors (default)***********************************
920  template< typename VT1 // Type of the left-hand side target vector
921  , typename MT1 // Type of the left-hand side matrix operand
922  , typename VT2 > // Type of the right-hand side vector operand
923  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
924  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
925  {
926  selectDefaultAddAssignKernel( y, A, x );
927  }
929  //**********************************************************************************************
930 
931  //**BLAS-based addition assignment to dense vectors (single precision)**************************
932 #if BLAZE_BLAS_MODE
933 
946  template< typename VT1 // Type of the left-hand side target vector
947  , typename MT1 // Type of the left-hand side matrix operand
948  , typename VT2 > // Type of the right-hand side vector operand
949  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
950  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
951  {
952  using boost::numeric_cast;
953 
954  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
955  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
956  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
957 
958  const int M ( numeric_cast<int>( A.rows() ) );
959  const int N ( numeric_cast<int>( A.columns() ) );
960  const int lda( numeric_cast<int>( A.spacing() ) );
961 
962  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
963  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
964  }
966 #endif
967  //**********************************************************************************************
968 
969  //**BLAS-based addition assignment to dense vectors (double precision)**************************
970 #if BLAZE_BLAS_MODE
971 
984  template< typename VT1 // Type of the left-hand side target vector
985  , typename MT1 // Type of the left-hand side matrix operand
986  , typename VT2 > // Type of the right-hand side vector operand
987  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
988  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
989  {
990  using boost::numeric_cast;
991 
992  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
993  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
994  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
995 
996  const int M ( numeric_cast<int>( A.rows() ) );
997  const int N ( numeric_cast<int>( A.columns() ) );
998  const int lda( numeric_cast<int>( A.spacing() ) );
999 
1000  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
1001  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1002  }
1004 #endif
1005  //**********************************************************************************************
1006 
1007  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1008 #if BLAZE_BLAS_MODE
1009 
1022  template< typename VT1 // Type of the left-hand side target vector
1023  , typename MT1 // Type of the left-hand side matrix operand
1024  , typename VT2 > // Type of the right-hand side vector operand
1025  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1026  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1027  {
1028  using boost::numeric_cast;
1029 
1030  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1031  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1032  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1033  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1034  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1035  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1036 
1037  const int M ( numeric_cast<int>( A.rows() ) );
1038  const int N ( numeric_cast<int>( A.columns() ) );
1039  const int lda( numeric_cast<int>( A.spacing() ) );
1040  const complex<float> alpha( 1.0F, 0.0F );
1041  const complex<float> beta ( 1.0F, 0.0F );
1042 
1043  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1044  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1045  }
1047 #endif
1048  //**********************************************************************************************
1049 
1050  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1051 #if BLAZE_BLAS_MODE
1052 
1065  template< typename VT1 // Type of the left-hand side target vector
1066  , typename MT1 // Type of the left-hand side matrix operand
1067  , typename VT2 > // Type of the right-hand side vector operand
1068  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1069  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1070  {
1071  using boost::numeric_cast;
1072 
1073  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1074  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1075  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1076  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1077  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1078  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1079 
1080  const int M ( numeric_cast<int>( A.rows() ) );
1081  const int N ( numeric_cast<int>( A.columns() ) );
1082  const int lda( numeric_cast<int>( A.spacing() ) );
1083  const complex<double> alpha( 1.0, 0.0 );
1084  const complex<double> beta ( 1.0, 0.0 );
1085 
1086  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1087  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1088  }
1090 #endif
1091  //**********************************************************************************************
1092 
1093  //**Addition assignment to sparse vectors*******************************************************
1094  // No special implementation for the addition assignment to sparse vectors.
1095  //**********************************************************************************************
1096 
1097  //**Subtraction assignment to dense vectors*****************************************************
1110  template< typename VT1 > // Type of the target dense vector
1111  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1112  {
1113  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1114 
1115  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1116  return;
1117  }
1118 
1119  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1120  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1121 
1122  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1123  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1124  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1125  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1126 
1127  if( ( IsComputation<MT>::value && !evaluate ) ||
1128  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1129  TDMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1130  else
1131  TDMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1132  }
1134  //**********************************************************************************************
1135 
1136  //**Default subtraction assignment to dense vectors*********************************************
1150  template< typename VT1 // Type of the left-hand side target vector
1151  , typename MT1 // Type of the left-hand side matrix operand
1152  , typename VT2 > // Type of the right-hand side vector operand
1153  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1154  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1155  {
1156  const size_t M( A.rows() );
1157  const size_t N( A.columns() );
1158 
1159  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1160  const size_t iend( M & size_t(-2) );
1161 
1162  for( size_t j=0UL; j<N; ++j ) {
1163  for( size_t i=0UL; i<iend; i+=2UL ) {
1164  y[i ] -= x[j] * A(i ,j);
1165  y[i+1UL] -= x[j] * A(i+1UL,j);
1166  }
1167  if( iend < M ) {
1168  y[iend] -= x[j] * A(iend,j);
1169  }
1170  }
1171  }
1173  //**********************************************************************************************
1174 
1175  //**Vectorized default subtraction assignment to dense vectors**********************************
1189  template< typename VT1 // Type of the left-hand side target vector
1190  , typename MT1 // Type of the left-hand side matrix operand
1191  , typename VT2 > // Type of the right-hand side vector operand
1192  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1193  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1194  {
1195  typedef IntrinsicTrait<ElementType> IT;
1196 
1197  const size_t M( A.spacing() );
1198  const size_t N( A.columns() );
1199 
1200  size_t i( 0UL );
1201 
1202  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1203  IntrinsicType xmm1( load( &y[i ] ) );
1204  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1205  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1206  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
1207  IntrinsicType xmm5( load( &y[i+IT::size*4UL] ) );
1208  IntrinsicType xmm6( load( &y[i+IT::size*5UL] ) );
1209  IntrinsicType xmm7( load( &y[i+IT::size*6UL] ) );
1210  IntrinsicType xmm8( load( &y[i+IT::size*7UL] ) );
1211  for( size_t j=0UL; j<N; ++j ) {
1212  const IntrinsicType x1( set( x[j] ) );
1213  xmm1 = xmm1 - A.get(i ,j) * x1;
1214  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1215  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1216  xmm4 = xmm4 - A.get(i+IT::size*3UL,j) * x1;
1217  xmm5 = xmm5 - A.get(i+IT::size*4UL,j) * x1;
1218  xmm6 = xmm6 - A.get(i+IT::size*5UL,j) * x1;
1219  xmm7 = xmm7 - A.get(i+IT::size*6UL,j) * x1;
1220  xmm8 = xmm8 - A.get(i+IT::size*7UL,j) * x1;
1221  }
1222  store( &y[i ], xmm1 );
1223  store( &y[i+IT::size ], xmm2 );
1224  store( &y[i+IT::size*2UL], xmm3 );
1225  store( &y[i+IT::size*3UL], xmm4 );
1226  store( &y[i+IT::size*4UL], xmm5 );
1227  store( &y[i+IT::size*5UL], xmm6 );
1228  store( &y[i+IT::size*6UL], xmm7 );
1229  store( &y[i+IT::size*7UL], xmm8 );
1230  }
1231  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1232  IntrinsicType xmm1( load( &y[i ] ) );
1233  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1234  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1235  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
1236  for( size_t j=0UL; j<N; ++j ) {
1237  const IntrinsicType x1( set( x[j] ) );
1238  xmm1 = xmm1 - A.get(i ,j) * x1;
1239  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1240  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1241  xmm4 = xmm4 - A.get(i+IT::size*3UL,j) * x1;
1242  }
1243  store( &y[i ], xmm1 );
1244  store( &y[i+IT::size ], xmm2 );
1245  store( &y[i+IT::size*2UL], xmm3 );
1246  store( &y[i+IT::size*3UL], xmm4 );
1247  }
1248  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
1249  IntrinsicType xmm1( load( &y[i ] ) );
1250  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1251  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1252  for( size_t j=0UL; j<N; ++j ) {
1253  const IntrinsicType x1( set( x[j] ) );
1254  xmm1 = xmm1 - A.get(i ,j) * x1;
1255  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1256  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1257  }
1258  store( &y[i ], xmm1 );
1259  store( &y[i+IT::size ], xmm2 );
1260  store( &y[i+IT::size*2UL], xmm3 );
1261  }
1262  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1263  IntrinsicType xmm1( load( &y[i ] ) );
1264  IntrinsicType xmm2( load( &y[i+IT::size] ) );
1265  for( size_t j=0UL; j<N; ++j ) {
1266  const IntrinsicType x1( set( x[j] ) );
1267  xmm1 = xmm1 - A.get(i ,j) * x1;
1268  xmm2 = xmm2 - A.get(i+IT::size,j) * x1;
1269  }
1270  store( &y[i ], xmm1 );
1271  store( &y[i+IT::size], xmm2 );
1272  }
1273  if( i < M ) {
1274  IntrinsicType xmm1( load( &y[i] ) );
1275  for( size_t j=0UL; j<N; ++j ) {
1276  xmm1 = xmm1 - A.get(i,j) * set( x[j] );
1277  }
1278  store( &y[i], xmm1 );
1279  }
1280  }
1282  //**********************************************************************************************
1283 
1284  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1298  template< typename VT1 // Type of the left-hand side target vector
1299  , typename MT1 // Type of the left-hand side matrix operand
1300  , typename VT2 > // Type of the right-hand side vector operand
1301  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1302  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1303  {
1304  selectDefaultSubAssignKernel( y, A, x );
1305  }
1307  //**********************************************************************************************
1308 
1309  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1310 #if BLAZE_BLAS_MODE
1311 
1324  template< typename VT1 // Type of the left-hand side target vector
1325  , typename MT1 // Type of the left-hand side matrix operand
1326  , typename VT2 > // Type of the right-hand side vector operand
1327  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1328  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1329  {
1330  using boost::numeric_cast;
1331 
1332  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1333  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1334  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1335 
1336  const int M ( numeric_cast<int>( A.rows() ) );
1337  const int N ( numeric_cast<int>( A.columns() ) );
1338  const int lda( numeric_cast<int>( A.spacing() ) );
1339 
1340  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -1.0F,
1341  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1342  }
1344 #endif
1345  //**********************************************************************************************
1346 
1347  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1348 #if BLAZE_BLAS_MODE
1349 
1362  template< typename VT1 // Type of the left-hand side target vector
1363  , typename MT1 // Type of the left-hand side matrix operand
1364  , typename VT2 > // Type of the right-hand side vector operand
1365  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1366  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1367  {
1368  using boost::numeric_cast;
1369 
1370  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1371  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1372  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1373 
1374  const int M ( numeric_cast<int>( A.rows() ) );
1375  const int N ( numeric_cast<int>( A.columns() ) );
1376  const int lda( numeric_cast<int>( A.spacing() ) );
1377 
1378  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -1.0,
1379  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1380  }
1382 #endif
1383  //**********************************************************************************************
1384 
1385  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1386 #if BLAZE_BLAS_MODE
1387 
1400  template< typename VT1 // Type of the left-hand side target vector
1401  , typename MT1 // Type of the left-hand side matrix operand
1402  , typename VT2 > // Type of the right-hand side vector operand
1403  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1404  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1405  {
1406  using boost::numeric_cast;
1407 
1408  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1409  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1410  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1411  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1412  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1413  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1414 
1415  const int M ( numeric_cast<int>( A.rows() ) );
1416  const int N ( numeric_cast<int>( A.columns() ) );
1417  const int lda( numeric_cast<int>( A.spacing() ) );
1418  const complex<float> alpha( -1.0F, 0.0F );
1419  const complex<float> beta ( 1.0F, 0.0F );
1420 
1421  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1422  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1423  }
1425 #endif
1426  //**********************************************************************************************
1427 
1428  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1429 #if BLAZE_BLAS_MODE
1430 
1443  template< typename VT1 // Type of the left-hand side target vector
1444  , typename MT1 // Type of the left-hand side matrix operand
1445  , typename VT2 > // Type of the right-hand side vector operand
1446  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1447  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1448  {
1449  using boost::numeric_cast;
1450 
1451  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1452  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1453  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1454  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1455  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1456  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1457 
1458  const int M ( numeric_cast<int>( A.rows() ) );
1459  const int N ( numeric_cast<int>( A.columns() ) );
1460  const int lda( numeric_cast<int>( A.spacing() ) );
1461  const complex<double> alpha( -1.0, 0.0 );
1462  const complex<double> beta ( 1.0, 0.0 );
1463 
1464  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1465  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1466  }
1468 #endif
1469  //**********************************************************************************************
1470 
1471  //**Subtraction assignment to sparse vectors****************************************************
1472  // No special implementation for the subtraction assignment to sparse vectors.
1473  //**********************************************************************************************
1474 
1475  //**Multiplication assignment to dense vectors**************************************************
1488  template< typename VT1 > // Type of the target dense vector
1489  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1490  {
1493  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1494 
1495  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1496 
1497  const ResultType tmp( rhs );
1498  multAssign( ~lhs, tmp );
1499  }
1501  //**********************************************************************************************
1502 
1503  //**Multiplication assignment to sparse vectors*************************************************
1504  // No special implementation for the multiplication assignment to sparse vectors.
1505  //**********************************************************************************************
1506 
1507  //**Compile time checks*************************************************************************
1514  //**********************************************************************************************
1515 };
1516 //*************************************************************************************************
1517 
1518 
1519 
1520 
1521 //=================================================================================================
1522 //
1523 // DVECSCALARMULTEXPR SPECIALIZATION
1524 //
1525 //=================================================================================================
1526 
1527 //*************************************************************************************************
1536 template< typename MT // Type of the left-hand side dense matrix
1537  , typename VT // Type of the right-hand side dense vector
1538  , typename ST > // Type of the side scalar value
1539 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
1540  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1541  , private Expression
1542  , private Computation
1543 {
1544  private:
1545  //**Type definitions****************************************************************************
1546  typedef TDMatDVecMultExpr<MT,VT> MVM;
1547  typedef typename MVM::ResultType RES;
1548  typedef typename MT::ResultType MRT;
1549  typedef typename VT::ResultType VRT;
1550  typedef typename MRT::ElementType MET;
1551  typedef typename VRT::ElementType VET;
1552  typedef typename MT::CompositeType MCT;
1553  typedef typename VT::CompositeType VCT;
1554  //**********************************************************************************************
1555 
1556  //**********************************************************************************************
1558  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1559  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1560  //**********************************************************************************************
1561 
1562  //**********************************************************************************************
1564 
1567  template< typename T1, typename T2, typename T3, typename T4 >
1568  struct UseSinglePrecisionKernel {
1569  enum { value = IsFloat<typename T1::ElementType>::value &&
1570  IsFloat<typename T2::ElementType>::value &&
1571  IsFloat<typename T3::ElementType>::value &&
1572  !IsComplex<T4>::value };
1573  };
1574  //**********************************************************************************************
1575 
1576  //**********************************************************************************************
1578 
1581  template< typename T1, typename T2, typename T3, typename T4 >
1582  struct UseDoublePrecisionKernel {
1583  enum { value = IsDouble<typename T1::ElementType>::value &&
1584  IsDouble<typename T2::ElementType>::value &&
1585  IsDouble<typename T3::ElementType>::value &&
1586  !IsComplex<T4>::value };
1587  };
1588  //**********************************************************************************************
1589 
1590  //**********************************************************************************************
1592 
1595  template< typename T1, typename T2, typename T3 >
1596  struct UseSinglePrecisionComplexKernel {
1597  typedef complex<float> Type;
1598  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1599  IsSame<typename T2::ElementType,Type>::value &&
1600  IsSame<typename T3::ElementType,Type>::value };
1601  };
1602  //**********************************************************************************************
1603 
1604  //**********************************************************************************************
1606 
1609  template< typename T1, typename T2, typename T3 >
1610  struct UseDoublePrecisionComplexKernel {
1611  typedef complex<double> Type;
1612  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1613  IsSame<typename T2::ElementType,Type>::value &&
1614  IsSame<typename T3::ElementType,Type>::value };
1615  };
1616  //**********************************************************************************************
1617 
1618  //**********************************************************************************************
1620 
1622  template< typename T1, typename T2, typename T3, typename T4 >
1623  struct UseDefaultKernel {
1624  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1625  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1626  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1627  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1628  };
1629  //**********************************************************************************************
1630 
1631  //**********************************************************************************************
1633 
1636  template< typename T1, typename T2, typename T3, typename T4 >
1637  struct UseVectorizedDefaultKernel {
1638  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1639  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1640  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1641  IsSame<typename T1::ElementType,T4>::value &&
1642  IntrinsicTrait<typename T1::ElementType>::addition &&
1643  IntrinsicTrait<typename T1::ElementType>::multiplication };
1644  };
1645  //**********************************************************************************************
1646 
1647  public:
1648  //**Type definitions****************************************************************************
1649  typedef DVecScalarMultExpr<MVM,ST,false> This;
1650  typedef typename MultTrait<RES,ST>::Type ResultType;
1651  typedef typename ResultType::TransposeType TransposeType;
1652  typedef typename ResultType::ElementType ElementType;
1653  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1654  typedef const ElementType ReturnType;
1655  typedef const ResultType CompositeType;
1656 
1658  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
1659 
1661  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
1662 
1664  typedef typename SelectType< evaluate, const MRT, MCT >::Type LT;
1665 
1667  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
1668  //**********************************************************************************************
1669 
1670  //**Compilation flags***************************************************************************
1672  enum { vectorizable = 0 };
1673 
1675  enum { canAlias = CanAlias<MVM>::value };
1676  //**********************************************************************************************
1677 
1678  //**Constructor*********************************************************************************
1684  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1685  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1686  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1687  {}
1688  //**********************************************************************************************
1689 
1690  //**Subscript operator**************************************************************************
1696  inline ReturnType operator[]( size_t index ) const {
1697  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1698  return vector_[index] * scalar_;
1699  }
1700  //**********************************************************************************************
1701 
1702  //**Size function*******************************************************************************
1707  inline size_t size() const {
1708  return vector_.size();
1709  }
1710  //**********************************************************************************************
1711 
1712  //**Left operand access*************************************************************************
1717  inline LeftOperand leftOperand() const {
1718  return vector_;
1719  }
1720  //**********************************************************************************************
1721 
1722  //**Right operand access************************************************************************
1727  inline RightOperand rightOperand() const {
1728  return scalar_;
1729  }
1730  //**********************************************************************************************
1731 
1732  //**********************************************************************************************
1738  template< typename T >
1739  inline bool isAliased( const T* alias ) const {
1740  return CanAlias<MVM>::value && vector_.isAliased( alias );
1741  }
1742  //**********************************************************************************************
1743 
1744  private:
1745  //**Member variables****************************************************************************
1746  LeftOperand vector_;
1747  RightOperand scalar_;
1748  //**********************************************************************************************
1749 
1750  //**Assignment to dense vectors*****************************************************************
1762  template< typename VT1 > // Type of the target dense vector
1763  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1764  {
1765  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1766 
1767  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1768  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1769 
1770  if( left.rows() == 0UL ) {
1771  return;
1772  }
1773  else if( left.columns() == 0UL ) {
1774  reset( ~lhs );
1775  return;
1776  }
1777 
1778  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1779  RT x( right ); // Evaluation of the right-hand side dense vector operand
1780 
1781  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1782  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1783  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1784  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1785 
1786  if( ( IsComputation<MT>::value && !evaluate ) ||
1787  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1788  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1789  else
1790  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1791  }
1792  //**********************************************************************************************
1793 
1794  //**Default assignment to dense vectors*********************************************************
1808  template< typename VT1 // Type of the left-hand side target vector
1809  , typename MT1 // Type of the left-hand side matrix operand
1810  , typename VT2 // Type of the right-hand side vector operand
1811  , typename ST2 > // Type of the scalar value
1812  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1813  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1814  {
1815  const size_t M( A.rows() );
1816  const size_t N( A.columns() );
1817 
1818  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1819  const size_t iend( M & size_t(-2) );
1820 
1821  for( size_t i=0UL; i<M; ++i ) {
1822  y[i] = x[0UL] * A(i,0UL);
1823  }
1824  for( size_t j=1UL; j<N; ++j ) {
1825  for( size_t i=0UL; i<iend; i+=2UL ) {
1826  y[i ] += x[j] * A(i ,j);
1827  y[i+1UL] += x[j] * A(i+1UL,j);
1828  }
1829  if( iend < M ) {
1830  y[iend] += x[j] * A(iend,j);
1831  }
1832  }
1833  for( size_t i=0UL; i<M; ++i ) {
1834  y[i] *= scalar;
1835  }
1836  }
1837  //**********************************************************************************************
1838 
1839  //**Vectorized default assignment to dense vectors**********************************************
1853  template< typename VT1 // Type of the left-hand side target vector
1854  , typename MT1 // Type of the left-hand side matrix operand
1855  , typename VT2 // Type of the right-hand side vector operand
1856  , typename ST2 > // Type of the scalar value
1857  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1858  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1859  {
1860  typedef IntrinsicTrait<ElementType> IT;
1861 
1862  const size_t M( A.spacing() );
1863  const size_t N( A.columns() );
1864 
1865  const IntrinsicType factor( set( scalar ) );
1866 
1867  size_t i( 0UL );
1868 
1869  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1870  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1871  for( size_t j=0UL; j<N; ++j ) {
1872  const IntrinsicType x1( set( x[j] ) );
1873  xmm1 = xmm1 + A.get(i ,j) * x1;
1874  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1875  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1876  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
1877  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
1878  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
1879  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
1880  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
1881  }
1882  store( &y[i ], xmm1*factor );
1883  store( &y[i+IT::size ], xmm2*factor );
1884  store( &y[i+IT::size*2UL], xmm3*factor );
1885  store( &y[i+IT::size*3UL], xmm4*factor );
1886  store( &y[i+IT::size*4UL], xmm5*factor );
1887  store( &y[i+IT::size*5UL], xmm6*factor );
1888  store( &y[i+IT::size*6UL], xmm7*factor );
1889  store( &y[i+IT::size*7UL], xmm8*factor );
1890  }
1891  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1892  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1893  for( size_t j=0UL; j<N; ++j ) {
1894  const IntrinsicType x1( set( x[j] ) );
1895  xmm1 = xmm1 + A.get(i ,j) * x1;
1896  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1897  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1898  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
1899  }
1900  store( &y[i ], xmm1*factor );
1901  store( &y[i+IT::size ], xmm2*factor );
1902  store( &y[i+IT::size*2UL], xmm3*factor );
1903  store( &y[i+IT::size*3UL], xmm4*factor );
1904  }
1905  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
1906  IntrinsicType xmm1, xmm2, xmm3;
1907  for( size_t j=0UL; j<N; ++j ) {
1908  const IntrinsicType x1( set( x[j] ) );
1909  xmm1 = xmm1 + A.get(i ,j) * x1;
1910  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1911  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1912  }
1913  store( &y[i ], xmm1*factor );
1914  store( &y[i+IT::size ], xmm2*factor );
1915  store( &y[i+IT::size*2UL], xmm3*factor );
1916  }
1917  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1918  IntrinsicType xmm1, xmm2;
1919  for( size_t j=0UL; j<N; ++j ) {
1920  const IntrinsicType x1( set( x[j] ) );
1921  xmm1 = xmm1 + A.get(i ,j) * x1;
1922  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
1923  }
1924  store( &y[i ], xmm1*factor );
1925  store( &y[i+IT::size], xmm2*factor );
1926  }
1927  if( i < M ) {
1928  IntrinsicType xmm1;
1929  for( size_t j=0UL; j<N; ++j ) {
1930  const IntrinsicType x1( set( x[j] ) );
1931  xmm1 = xmm1 + A.get(i,j) * x1;
1932  }
1933  store( &y[i], xmm1*factor );
1934  }
1935  }
1936  //**********************************************************************************************
1937 
1938  //**BLAS-based assignment to dense vectors (default)********************************************
1952  template< typename VT1 // Type of the left-hand side target vector
1953  , typename MT1 // Type of the left-hand side matrix operand
1954  , typename VT2 // Type of the right-hand side vector operand
1955  , typename ST2 > // Type of the scalar value
1956  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1957  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1958  {
1959  selectDefaultAssignKernel( y, A, x, scalar );
1960  }
1961  //**********************************************************************************************
1962 
1963  //**BLAS-based assignment to dense vectors (single precision)***********************************
1964 #if BLAZE_BLAS_MODE
1965 
1978  template< typename VT1 // Type of the left-hand side target vector
1979  , typename MT1 // Type of the left-hand side matrix operand
1980  , typename VT2 // Type of the right-hand side vector operand
1981  , typename ST2 > // Type of the scalar value
1982  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1983  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1984  {
1985  using boost::numeric_cast;
1986 
1987  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1988  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1989  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1990 
1991  const int M ( numeric_cast<int>( A.rows() ) );
1992  const int N ( numeric_cast<int>( A.columns() ) );
1993  const int lda( numeric_cast<int>( A.spacing() ) );
1994 
1995  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
1996  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1997  }
1998 #endif
1999  //**********************************************************************************************
2000 
2001  //**BLAS-based assignment to dense vectors (double precision)***********************************
2002 #if BLAZE_BLAS_MODE
2003 
2016  template< typename VT1 // Type of the left-hand side target vector
2017  , typename MT1 // Type of the left-hand side matrix operand
2018  , typename VT2 // Type of the right-hand side vector operand
2019  , typename ST2 > // Type of the scalar value
2020  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2021  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2022  {
2023  using boost::numeric_cast;
2024 
2025  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2026  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2027  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2028 
2029  const int M ( numeric_cast<int>( A.rows() ) );
2030  const int N ( numeric_cast<int>( A.columns() ) );
2031  const int lda( numeric_cast<int>( A.spacing() ) );
2032 
2033  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2034  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2035  }
2036 #endif
2037  //**********************************************************************************************
2038 
2039  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2040 #if BLAZE_BLAS_MODE
2041 
2054  template< typename VT1 // Type of the left-hand side target vector
2055  , typename MT1 // Type of the left-hand side matrix operand
2056  , typename VT2 // Type of the right-hand side vector operand
2057  , typename ST2 > // Type of the scalar value
2058  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2059  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2060  {
2061  using boost::numeric_cast;
2062 
2063  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2064  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2065  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2067  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2068  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2069  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2070 
2071  const int M ( numeric_cast<int>( A.rows() ) );
2072  const int N ( numeric_cast<int>( A.columns() ) );
2073  const int lda( numeric_cast<int>( A.spacing() ) );
2074  const complex<float> alpha( scalar );
2075  const complex<float> beta ( 0.0F, 0.0F );
2076 
2077  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2078  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2079  }
2080 #endif
2081  //**********************************************************************************************
2082 
2083  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2084 #if BLAZE_BLAS_MODE
2085 
2098  template< typename VT1 // Type of the left-hand side target vector
2099  , typename MT1 // Type of the left-hand side matrix operand
2100  , typename VT2 // Type of the right-hand side vector operand
2101  , typename ST2 > // Type of the scalar value
2102  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2103  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2104  {
2105  using boost::numeric_cast;
2106 
2107  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2108  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2109  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2111  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2112  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2113  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2114 
2115  const int M ( numeric_cast<int>( A.rows() ) );
2116  const int N ( numeric_cast<int>( A.columns() ) );
2117  const int lda( numeric_cast<int>( A.spacing() ) );
2118  const complex<double> alpha( scalar );
2119  const complex<double> beta ( 0.0, 0.0 );
2120 
2121  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2122  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2123  }
2124 #endif
2125  //**********************************************************************************************
2126 
2127  //**Assignment to sparse vectors****************************************************************
2139  template< typename VT1 > // Type of the target sparse vector
2140  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2141  {
2144  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2145 
2146  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2147 
2148  const ResultType tmp( rhs );
2149  assign( ~lhs, tmp );
2150  }
2151  //**********************************************************************************************
2152 
2153  //**Addition assignment to dense vectors********************************************************
2165  template< typename VT1 > // Type of the target dense vector
2166  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2167  {
2168  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2169 
2170  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2171  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2172 
2173  if( left.rows() == 0UL || left.columns() == 0UL ) {
2174  return;
2175  }
2176 
2177  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2178  RT x( right ); // Evaluation of the right-hand side dense vector operand
2179 
2180  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2181  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2182  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2183  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2184 
2185  if( ( IsComputation<MT>::value && !evaluate ) ||
2186  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2187  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2188  else
2189  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2190  }
2191  //**********************************************************************************************
2192 
2193  //**Default addition assignment to dense vectors************************************************
2207  template< typename VT1 // Type of the left-hand side target vector
2208  , typename MT1 // Type of the left-hand side matrix operand
2209  , typename VT2 // Type of the right-hand side vector operand
2210  , typename ST2 > // Type of the scalar value
2211  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2212  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2213  {
2214  y.addAssign( A * x * scalar );
2215  }
2216  //**********************************************************************************************
2217 
2218  //**Vectorized default addition assignment to dense vectors*************************************
2232  template< typename VT1 // Type of the left-hand side target vector
2233  , typename MT1 // Type of the left-hand side matrix operand
2234  , typename VT2 // Type of the right-hand side vector operand
2235  , typename ST2 > // Type of the scalar value
2236  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2237  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2238  {
2239  typedef IntrinsicTrait<ElementType> IT;
2240 
2241  const size_t M( A.spacing() );
2242  const size_t N( A.columns() );
2243 
2244  const IntrinsicType factor( set( scalar ) );
2245 
2246  size_t i( 0UL );
2247 
2248  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2249  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2250  for( size_t j=0UL; j<N; ++j ) {
2251  const IntrinsicType x1( set( x[j] ) );
2252  xmm1 = xmm1 + A.get(i ,j) * x1;
2253  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2254  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2255  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2256  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
2257  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
2258  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
2259  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
2260  }
2261  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2262  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2263  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2264  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) + xmm4*factor );
2265  store( &y[i+IT::size*4UL], load( &y[i+IT::size*4UL] ) + xmm5*factor );
2266  store( &y[i+IT::size*5UL], load( &y[i+IT::size*5UL] ) + xmm6*factor );
2267  store( &y[i+IT::size*6UL], load( &y[i+IT::size*6UL] ) + xmm7*factor );
2268  store( &y[i+IT::size*7UL], load( &y[i+IT::size*7UL] ) + xmm8*factor );
2269  }
2270  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2271  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2272  for( size_t j=0UL; j<N; ++j ) {
2273  const IntrinsicType x1( set( x[j] ) );
2274  xmm1 = xmm1 + A.get(i ,j) * x1;
2275  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2276  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2277  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2278  }
2279  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2280  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2281  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2282  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) + xmm4*factor );
2283  }
2284  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
2285  IntrinsicType xmm1, xmm2, xmm3;
2286  for( size_t j=0UL; j<N; ++j ) {
2287  const IntrinsicType x1( set( x[j] ) );
2288  xmm1 = xmm1 + A.get(i ,j) * x1;
2289  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2290  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2291  }
2292  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2293  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2294  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2295  }
2296  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2297  IntrinsicType xmm1, xmm2;
2298  for( size_t j=0UL; j<N; ++j ) {
2299  const IntrinsicType x1( set( x[j] ) );
2300  xmm1 = xmm1 + A.get(i ,j) * x1;
2301  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
2302  }
2303  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2304  store( &y[i+IT::size], load( &y[i+IT::size] ) + xmm2*factor );
2305  }
2306  if( i < M ) {
2307  IntrinsicType xmm1;
2308  for( size_t j=0UL; j<N; ++j ) {
2309  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
2310  }
2311  store( &y[i], load( &y[i] ) + xmm1*factor );
2312  }
2313  }
2314  //**********************************************************************************************
2315 
2316  //**BLAS-based addition assignment to dense vectors (default)***********************************
2330  template< typename VT1 // Type of the left-hand side target vector
2331  , typename MT1 // Type of the left-hand side matrix operand
2332  , typename VT2 // Type of the right-hand side vector operand
2333  , typename ST2 > // Type of the scalar value
2334  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2335  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2336  {
2337  selectDefaultAddAssignKernel( y, A, x, scalar );
2338  }
2339  //**********************************************************************************************
2340 
2341  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2342 #if BLAZE_BLAS_MODE
2343 
2356  template< typename VT1 // Type of the left-hand side target vector
2357  , typename MT1 // Type of the left-hand side matrix operand
2358  , typename VT2 // Type of the right-hand side vector operand
2359  , typename ST2 > // Type of the scalar value
2360  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2361  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2362  {
2363  using boost::numeric_cast;
2364 
2365  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2366  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2367  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2368 
2369  const int M ( numeric_cast<int>( A.rows() ) );
2370  const int N ( numeric_cast<int>( A.columns() ) );
2371  const int lda( numeric_cast<int>( A.spacing() ) );
2372 
2373  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2374  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2375  }
2376 #endif
2377  //**********************************************************************************************
2378 
2379  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2380 #if BLAZE_BLAS_MODE
2381 
2394  template< typename VT1 // Type of the left-hand side target vector
2395  , typename MT1 // Type of the left-hand side matrix operand
2396  , typename VT2 // Type of the right-hand side vector operand
2397  , typename ST2 > // Type of the scalar value
2398  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2399  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2400  {
2401  using boost::numeric_cast;
2402 
2403  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2404  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2405  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2406 
2407  const int M ( numeric_cast<int>( A.rows() ) );
2408  const int N ( numeric_cast<int>( A.columns() ) );
2409  const int lda( numeric_cast<int>( A.spacing() ) );
2410 
2411  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2412  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2413  }
2414 #endif
2415  //**********************************************************************************************
2416 
2417  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2418 #if BLAZE_BLAS_MODE
2419 
2432  template< typename VT1 // Type of the left-hand side target vector
2433  , typename MT1 // Type of the left-hand side matrix operand
2434  , typename VT2 // Type of the right-hand side vector operand
2435  , typename ST2 > // Type of the scalar value
2436  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2437  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2438  {
2439  using boost::numeric_cast;
2440 
2441  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2442  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2443  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2445  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2446  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2447  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2448 
2449  const int M ( numeric_cast<int>( A.rows() ) );
2450  const int N ( numeric_cast<int>( A.columns() ) );
2451  const int lda( numeric_cast<int>( A.spacing() ) );
2452  const complex<float> alpha( scalar );
2453  const complex<float> beta ( 1.0F, 0.0F );
2454 
2455  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2456  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2457  }
2458 #endif
2459  //**********************************************************************************************
2460 
2461  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2462 #if BLAZE_BLAS_MODE
2463 
2476  template< typename VT1 // Type of the left-hand side target vector
2477  , typename MT1 // Type of the left-hand side matrix operand
2478  , typename VT2 // Type of the right-hand side vector operand
2479  , typename ST2 > // Type of the scalar value
2480  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2481  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2482  {
2483  using boost::numeric_cast;
2484 
2485  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2486  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2487  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2489  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2490  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2491  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2492 
2493  const int M ( numeric_cast<int>( A.rows() ) );
2494  const int N ( numeric_cast<int>( A.columns() ) );
2495  const int lda( numeric_cast<int>( A.spacing() ) );
2496  const complex<double> alpha( scalar );
2497  const complex<double> beta ( 1.0, 0.0 );
2498 
2499  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2500  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2501  }
2502 #endif
2503  //**********************************************************************************************
2504 
2505  //**Addition assignment to sparse vectors*******************************************************
2506  // No special implementation for the addition assignment to sparse vectors.
2507  //**********************************************************************************************
2508 
2509  //**Subtraction assignment to dense vectors*****************************************************
2521  template< typename VT1 > // Type of the target dense vector
2522  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2523  {
2524  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2525 
2526  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2527  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2528 
2529  if( left.rows() == 0UL || left.columns() == 0UL ) {
2530  return;
2531  }
2532 
2533  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2534  RT x( right ); // Evaluation of the right-hand side dense vector operand
2535 
2536  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2537  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2538  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2539  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2540 
2541  if( ( IsComputation<MT>::value && !evaluate ) ||
2542  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2543  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2544  else
2545  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2546  }
2547  //**********************************************************************************************
2548 
2549  //**Default subtraction assignment to dense vectors*********************************************
2563  template< typename VT1 // Type of the left-hand side target vector
2564  , typename MT1 // Type of the left-hand side matrix operand
2565  , typename VT2 // Type of the right-hand side vector operand
2566  , typename ST2 > // Type of the scalar value
2567  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2568  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2569  {
2570  y.subAssign( A * x * scalar );
2571  }
2572  //**********************************************************************************************
2573 
2574  //**Vectorized default subtraction assignment to dense vectors**********************************
2588  template< typename VT1 // Type of the left-hand side target vector
2589  , typename MT1 // Type of the left-hand side matrix operand
2590  , typename VT2 // Type of the right-hand side vector operand
2591  , typename ST2 > // Type of the scalar value
2592  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2593  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2594  {
2595  typedef IntrinsicTrait<ElementType> IT;
2596 
2597  const size_t M( A.spacing() );
2598  const size_t N( A.columns() );
2599 
2600  const IntrinsicType factor( set( scalar ) );
2601 
2602  size_t i( 0UL );
2603 
2604  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2605  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2606  for( size_t j=0UL; j<N; ++j ) {
2607  const IntrinsicType x1( set( x[j] ) );
2608  xmm1 = xmm1 + A.get(i ,j) * x1;
2609  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2610  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2611  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2612  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
2613  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
2614  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
2615  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
2616  }
2617  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2618  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2619  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2620  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) - xmm4*factor );
2621  store( &y[i+IT::size*4UL], load( &y[i+IT::size*4UL] ) - xmm5*factor );
2622  store( &y[i+IT::size*5UL], load( &y[i+IT::size*5UL] ) - xmm6*factor );
2623  store( &y[i+IT::size*6UL], load( &y[i+IT::size*6UL] ) - xmm7*factor );
2624  store( &y[i+IT::size*7UL], load( &y[i+IT::size*7UL] ) - xmm8*factor );
2625  }
2626  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2627  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2628  for( size_t j=0UL; j<N; ++j ) {
2629  const IntrinsicType x1( set( x[j] ) );
2630  xmm1 = xmm1 + A.get(i ,j) * x1;
2631  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2632  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2633  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2634  }
2635  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2636  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2637  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2638  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) - xmm4*factor );
2639  }
2640  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
2641  IntrinsicType xmm1, xmm2, xmm3;
2642  for( size_t j=0UL; j<N; ++j ) {
2643  const IntrinsicType x1( set( x[j] ) );
2644  xmm1 = xmm1 + A.get(i ,j) * x1;
2645  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2646  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2647  }
2648  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2649  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2650  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2651  }
2652  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2653  IntrinsicType xmm1, xmm2;
2654  for( size_t j=0UL; j<N; ++j ) {
2655  const IntrinsicType x1( set( x[j] ) );
2656  xmm1 = xmm1 + A.get(i ,j) * x1;
2657  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
2658  }
2659  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2660  store( &y[i+IT::size], load( &y[i+IT::size] ) - xmm2*factor );
2661  }
2662  if( i < M ) {
2663  IntrinsicType xmm1;
2664  for( size_t j=0UL; j<N; ++j ) {
2665  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
2666  }
2667  store( &y[i], load( &y[i] ) - xmm1*factor );
2668  }
2669  }
2670  //**********************************************************************************************
2671 
2672  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2686  template< typename VT1 // Type of the left-hand side target vector
2687  , typename MT1 // Type of the left-hand side matrix operand
2688  , typename VT2 // Type of the right-hand side vector operand
2689  , typename ST2 > // Type of the scalar value
2690  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2691  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2692  {
2693  selectDefaultSubAssignKernel( y, A, x, scalar );
2694  }
2695  //**********************************************************************************************
2696 
2697  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2698 #if BLAZE_BLAS_MODE
2699 
2712  template< typename VT1 // Type of the left-hand side target vector
2713  , typename MT1 // Type of the left-hand side matrix operand
2714  , typename VT2 // Type of the right-hand side vector operand
2715  , typename ST2 > // Type of the scalar value
2716  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2717  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2718  {
2719  using boost::numeric_cast;
2720 
2721  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2722  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2723  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2724 
2725  const int M ( numeric_cast<int>( A.rows() ) );
2726  const int N ( numeric_cast<int>( A.columns() ) );
2727  const int lda( numeric_cast<int>( A.spacing() ) );
2728 
2729  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2730  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2731  }
2732 #endif
2733  //**********************************************************************************************
2734 
2735  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2736 #if BLAZE_BLAS_MODE
2737 
2750  template< typename VT1 // Type of the left-hand side target vector
2751  , typename MT1 // Type of the left-hand side matrix operand
2752  , typename VT2 // Type of the right-hand side vector operand
2753  , typename ST2 > // Type of the scalar value
2754  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2755  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2756  {
2757  using boost::numeric_cast;
2758 
2759  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2760  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2761  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2762 
2763  const int M ( numeric_cast<int>( A.rows() ) );
2764  const int N ( numeric_cast<int>( A.columns() ) );
2765  const int lda( numeric_cast<int>( A.spacing() ) );
2766 
2767  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2768  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2769  }
2770 #endif
2771  //**********************************************************************************************
2772 
2773  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2774 #if BLAZE_BLAS_MODE
2775 
2788  template< typename VT1 // Type of the left-hand side target vector
2789  , typename MT1 // Type of the left-hand side matrix operand
2790  , typename VT2 // Type of the right-hand side vector operand
2791  , typename ST2 > // Type of the scalar value
2792  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2793  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2794  {
2795  using boost::numeric_cast;
2796 
2797  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2798  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2799  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2801  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2802  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2803  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2804 
2805  const int M ( numeric_cast<int>( A.rows() ) );
2806  const int N ( numeric_cast<int>( A.columns() ) );
2807  const int lda( numeric_cast<int>( A.spacing() ) );
2808  const complex<float> alpha( -scalar );
2809  const complex<float> beta ( 1.0F, 0.0F );
2810 
2811  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2812  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2813  }
2814 #endif
2815  //**********************************************************************************************
2816 
2817  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2818 #if BLAZE_BLAS_MODE
2819 
2832  template< typename VT1 // Type of the left-hand side target vector
2833  , typename MT1 // Type of the left-hand side matrix operand
2834  , typename VT2 // Type of the right-hand side vector operand
2835  , typename ST2 > // Type of the scalar value
2836  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2837  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2838  {
2839  using boost::numeric_cast;
2840 
2841  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2842  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2843  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2845  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2846  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2847  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2848 
2849  const int M ( numeric_cast<int>( A.rows() ) );
2850  const int N ( numeric_cast<int>( A.columns() ) );
2851  const int lda( numeric_cast<int>( A.spacing() ) );
2852  const complex<double> alpha( -scalar );
2853  const complex<double> beta ( 1.0, 0.0 );
2854 
2855  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2856  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2857  }
2858 #endif
2859  //**********************************************************************************************
2860 
2861  //**Subtraction assignment to sparse vectors****************************************************
2862  // No special implementation for the subtraction assignment to sparse vectors.
2863  //**********************************************************************************************
2864 
2865  //**Multiplication assignment to dense vectors**************************************************
2877  template< typename VT1 > // Type of the target dense vector
2878  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2879  {
2882  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2883 
2884  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2885 
2886  const ResultType tmp( rhs );
2887  multAssign( ~lhs, tmp );
2888  }
2889  //**********************************************************************************************
2890 
2891  //**Multiplication assignment to sparse vectors*************************************************
2892  // No special implementation for the multiplication assignment to sparse vectors.
2893  //**********************************************************************************************
2894 
2895  //**Compile time checks*************************************************************************
2903  //**********************************************************************************************
2904 };
2906 //*************************************************************************************************
2907 
2908 
2909 
2910 
2911 //=================================================================================================
2912 //
2913 // GLOBAL BINARY ARITHMETIC OPERATORS
2914 //
2915 //=================================================================================================
2916 
2917 //*************************************************************************************************
2948 template< typename T1 // Type of the left-hand side dense matrix
2949  , typename T2 > // Type of the right-hand side dense vector
2950 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
2952 {
2953  if( (~mat).columns() != (~vec).size() )
2954  throw std::invalid_argument( "Matrix and vector sizes do not match" );
2955 
2956  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
2957 }
2958 //*************************************************************************************************
2959 
2960 } // namespace blaze
2961 
2962 #endif