All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
41 #include <blaze/math/Intrinsics.h>
42 #include <blaze/math/shims/Reset.h>
50 #include <blaze/system/BLAS.h>
52 #include <blaze/util/Assert.h>
53 #include <blaze/util/Complex.h>
59 #include <blaze/util/DisableIf.h>
60 #include <blaze/util/EnableIf.h>
62 #include <blaze/util/SelectType.h>
63 #include <blaze/util/Types.h>
69 
70 
71 namespace blaze {
72 
73 //=================================================================================================
74 //
75 // CLASS DMATDVECMULTEXPR
76 //
77 //=================================================================================================
78 
79 //*************************************************************************************************
86 template< typename MT // Type of the left-hand side dense matrix
87  , typename VT > // Type of the right-hand side dense vector
88 class DMatDVecMultExpr : public DenseVector< DMatDVecMultExpr<MT,VT>, false >
89  , private MatVecMultExpr
90  , private Computation
91 {
92  private:
93  //**Type definitions****************************************************************************
94  typedef typename MT::ResultType MRT;
95  typedef typename VT::ResultType VRT;
96  typedef typename MRT::ElementType MET;
97  typedef typename VRT::ElementType VET;
98  typedef typename MT::CompositeType MCT;
99  typedef typename VT::CompositeType VCT;
100  //**********************************************************************************************
101 
102  //**********************************************************************************************
104  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
106  //**********************************************************************************************
107 
108  //**********************************************************************************************
110 
111 
114  template< typename T1, typename T2, typename T3 >
115  struct UseSinglePrecisionKernel {
116  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
120  };
122  //**********************************************************************************************
123 
124  //**********************************************************************************************
126 
127 
130  template< typename T1, typename T2, typename T3 >
131  struct UseDoublePrecisionKernel {
132  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
133  IsDouble<typename T1::ElementType>::value &&
134  IsDouble<typename T2::ElementType>::value &&
135  IsDouble<typename T3::ElementType>::value };
136  };
138  //**********************************************************************************************
139 
140  //**********************************************************************************************
142 
143 
146  template< typename T1, typename T2, typename T3 >
147  struct UseSinglePrecisionComplexKernel {
148  typedef complex<float> Type;
149  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
150  IsSame<typename T1::ElementType,Type>::value &&
151  IsSame<typename T2::ElementType,Type>::value &&
152  IsSame<typename T3::ElementType,Type>::value };
153  };
155  //**********************************************************************************************
156 
157  //**********************************************************************************************
159 
160 
163  template< typename T1, typename T2, typename T3 >
164  struct UseDoublePrecisionComplexKernel {
165  typedef complex<double> Type;
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
167  IsSame<typename T1::ElementType,Type>::value &&
168  IsSame<typename T2::ElementType,Type>::value &&
169  IsSame<typename T3::ElementType,Type>::value };
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
177 
179  template< typename T1, typename T2, typename T3 >
180  struct UseDefaultKernel {
181  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
182  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
183  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
184  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
185  };
187  //**********************************************************************************************
188 
189  //**********************************************************************************************
191 
192 
195  template< typename T1, typename T2, typename T3 >
196  struct UseVectorizedDefaultKernel {
197  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
198  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
199  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
200  IntrinsicTrait<typename T1::ElementType>::addition &&
201  IntrinsicTrait<typename T1::ElementType>::multiplication };
202  };
204  //**********************************************************************************************
205 
206  public:
207  //**Type definitions****************************************************************************
210  typedef typename ResultType::TransposeType TransposeType;
211  typedef typename ResultType::ElementType ElementType;
213  typedef const ElementType ReturnType;
214  typedef const ResultType CompositeType;
215 
217  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
218 
220  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
221 
224 
226  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
227  //**********************************************************************************************
228 
229  //**Compilation flags***************************************************************************
231  enum { vectorizable = 0 };
232  //**********************************************************************************************
233 
234  //**Constructor*********************************************************************************
240  explicit inline DMatDVecMultExpr( const MT& mat, const VT& vec )
241  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
242  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
243  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
244  {
245  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
246  }
247  //**********************************************************************************************
248 
249  //**Subscript operator**************************************************************************
255  inline ReturnType operator[]( size_t index ) const {
256  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
257 
258  ElementType res;
259 
260  if( mat_.columns() != 0UL ) {
261  res = mat_(index,0UL) * vec_[0UL];
262  for( size_t j=1UL; j<end_; j+=2UL ) {
263  res += mat_(index,j) * vec_[j] + mat_(index,j+1UL) * vec_[j+1UL];
264  }
265  if( end_ < mat_.columns() ) {
266  res += mat_(index,end_) * vec_[end_];
267  }
268  }
269  else {
270  reset( res );
271  }
272 
273  return res;
274  }
275  //**********************************************************************************************
276 
277  //**Size function*******************************************************************************
282  inline size_t size() const {
283  return mat_.rows();
284  }
285  //**********************************************************************************************
286 
287  //**Left function*******************************************************************************
292  inline LeftOperand leftOperand() const {
293  return mat_;
294  }
295  //**********************************************************************************************
296 
297  //**Right function******************************************************************************
302  inline RightOperand rightOperand() const {
303  return vec_;
304  }
305  //**********************************************************************************************
306 
307  //**********************************************************************************************
313  template< typename T >
314  inline bool canAlias( const T* alias ) const {
315  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
316  }
317  //**********************************************************************************************
318 
319  //**********************************************************************************************
325  template< typename T >
326  inline bool isAliased( const T* alias ) const {
327  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
328  }
329  //**********************************************************************************************
330 
331  private:
332  //**Member variables****************************************************************************
335  const size_t end_;
336  //**********************************************************************************************
337 
338  //**Assignment to dense vectors*****************************************************************
350  template< typename VT1 > // Type of the target dense vector
351  friend inline void assign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
352  {
354 
355  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
356 
357  if( rhs.mat_.rows() == 0UL ) {
358  return;
359  }
360  else if( rhs.mat_.columns() == 0UL ) {
361  reset( ~lhs );
362  return;
363  }
364 
365  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
366  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
367 
368  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
369  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
370  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
371  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
372 
373  if( ( IsComputation<MT>::value && !evaluate ) ||
374  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
375  DMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
376  else
377  DMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
378  }
380  //**********************************************************************************************
381 
382  //**Default assignment to dense vectors*********************************************************
396  template< typename VT1 // Type of the left-hand side target vector
397  , typename MT1 // Type of the left-hand side matrix operand
398  , typename VT2 > // Type of the right-hand side vector operand
399  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
400  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
401  {
402  y.assign( A * x );
403  }
405  //**********************************************************************************************
406 
407  //**Vectorized default assignment to dense vectors**********************************************
421  template< typename VT1 // Type of the left-hand side target vector
422  , typename MT1 // Type of the left-hand side matrix operand
423  , typename VT2 > // Type of the right-hand side vector operand
424  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
425  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
426  {
427  typedef IntrinsicTrait<ElementType> IT;
428 
429  const size_t M( A.rows() );
430  const size_t N( A.columns() );
431 
432  size_t i( 0UL );
433 
434  for( ; (i+8UL) <= M; i+=8UL ) {
435  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
436  for( size_t j=0UL; j<N; j+=IT::size ) {
437  const IntrinsicType x1( x.get(j) );
438  xmm1 = xmm1 + A.get(i ,j) * x1;
439  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
440  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
441  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
442  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
443  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
444  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
445  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
446  }
447  y[i ] = sum( xmm1 );
448  y[i+1UL] = sum( xmm2 );
449  y[i+2UL] = sum( xmm3 );
450  y[i+3UL] = sum( xmm4 );
451  y[i+4UL] = sum( xmm5 );
452  y[i+5UL] = sum( xmm6 );
453  y[i+6UL] = sum( xmm7 );
454  y[i+7UL] = sum( xmm8 );
455  }
456  for( ; (i+4UL) <= M; i+=4UL ) {
457  IntrinsicType xmm1, xmm2, xmm3, xmm4;
458  for( size_t j=0UL; j<N; j+=IT::size ) {
459  const IntrinsicType x1( x.get(j) );
460  xmm1 = xmm1 + A.get(i ,j) * x1;
461  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
462  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
463  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
464  }
465  y[i ] = sum( xmm1 );
466  y[i+1UL] = sum( xmm2 );
467  y[i+2UL] = sum( xmm3 );
468  y[i+3UL] = sum( xmm4 );
469  }
470  for( ; (i+3UL) <= M; i+=3UL ) {
471  IntrinsicType xmm1, xmm2, xmm3;
472  for( size_t j=0UL; j<N; j+=IT::size ) {
473  const IntrinsicType x1( x.get(j) );
474  xmm1 = xmm1 + A.get(i ,j) * x1;
475  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
476  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
477  }
478  y[i ] = sum( xmm1 );
479  y[i+1UL] = sum( xmm2 );
480  y[i+2UL] = sum( xmm3 );
481  }
482  for( ; (i+2UL) <= M; i+=2UL ) {
483  IntrinsicType xmm1, xmm2;
484  for( size_t j=0UL; j<N; j+=IT::size ) {
485  const IntrinsicType x1( x.get(j) );
486  xmm1 = xmm1 + A.get(i ,j) * x1;
487  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
488  }
489  y[i ] = sum( xmm1 );
490  y[i+1UL] = sum( xmm2 );
491  }
492  if( i < M ) {
493  IntrinsicType xmm1;
494  for( size_t j=0UL; j<N; j+=IT::size ) {
495  xmm1 = xmm1 + A.get(i,j) * x.get(j);
496  }
497  y[i] = sum( xmm1 );
498  }
499  }
501  //**********************************************************************************************
502 
503  //**BLAS-based assignment to dense vectors (default)********************************************
517  template< typename VT1 // Type of the left-hand side target vector
518  , typename MT1 // Type of the left-hand side matrix operand
519  , typename VT2 > // Type of the right-hand side vector operand
520  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
521  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
522  {
523  selectDefaultAssignKernel( y, A, x );
524  }
526  //**********************************************************************************************
527 
528  //**BLAS-based assignment to dense vectors (single precision)***********************************
529 #if BLAZE_BLAS_MODE
530 
543  template< typename VT1 // Type of the left-hand side target vector
544  , typename MT1 // Type of the left-hand side matrix operand
545  , typename VT2 > // Type of the right-hand side vector operand
546  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
547  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
548  {
549  using boost::numeric_cast;
550 
551  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
552  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
553  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
554 
555  const int M ( numeric_cast<int>( A.rows() ) );
556  const int N ( numeric_cast<int>( A.columns() ) );
557  const int lda( numeric_cast<int>( A.spacing() ) );
558 
559  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
560  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
561  }
563 #endif
564  //**********************************************************************************************
565 
566  //**BLAS-based assignment to dense vectors (double precision)***********************************
567 #if BLAZE_BLAS_MODE
568 
581  template< typename VT1 // Type of the left-hand side target vector
582  , typename MT1 // Type of the left-hand side matrix operand
583  , typename VT2 > // Type of the right-hand side vector operand
584  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
585  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
586  {
587  using boost::numeric_cast;
588 
589  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
590  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
591  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
592 
593  const int M ( numeric_cast<int>( A.rows() ) );
594  const int N ( numeric_cast<int>( A.columns() ) );
595  const int lda( numeric_cast<int>( A.spacing() ) );
596 
597  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
598  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
599  }
601 #endif
602  //**********************************************************************************************
603 
604  //**BLAS-based assignment to dense vectors (single precision complex)***************************
605 #if BLAZE_BLAS_MODE
606 
619  template< typename VT1 // Type of the left-hand side target vector
620  , typename MT1 // Type of the left-hand side matrix operand
621  , typename VT2 > // Type of the right-hand side vector operand
622  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
623  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
624  {
625  using boost::numeric_cast;
626 
627  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
628  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
629  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
630  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
631  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
632  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
633 
634  const int M ( numeric_cast<int>( A.rows() ) );
635  const int N ( numeric_cast<int>( A.columns() ) );
636  const int lda( numeric_cast<int>( A.spacing() ) );
637  const complex<float> alpha( 1.0F, 0.0F );
638  const complex<float> beta ( 0.0F, 0.0F );
639 
640  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
641  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
642  }
644 #endif
645  //**********************************************************************************************
646 
647  //**BLAS-based assignment to dense vectors (double precision complex)***************************
648 #if BLAZE_BLAS_MODE
649 
662  template< typename VT1 // Type of the left-hand side target vector
663  , typename MT1 // Type of the left-hand side matrix operand
664  , typename VT2 > // Type of the right-hand side vector operand
665  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
666  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
667  {
668  using boost::numeric_cast;
669 
670  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
671  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
672  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
673  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
674  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
675  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
676 
677  const int M ( numeric_cast<int>( A.rows() ) );
678  const int N ( numeric_cast<int>( A.columns() ) );
679  const int lda( numeric_cast<int>( A.spacing() ) );
680  const complex<double> alpha( 1.0, 0.0 );
681  const complex<double> beta ( 0.0, 0.0 );
682 
683  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
684  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
685  }
687 #endif
688  //**********************************************************************************************
689 
690  //**Assignment to sparse vectors****************************************************************
702  template< typename VT1 > // Type of the target sparse vector
703  friend inline void assign( SparseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
704  {
706 
709  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
710 
711  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
712 
713  const ResultType tmp( rhs );
714  assign( ~lhs, tmp );
715  }
717  //**********************************************************************************************
718 
719  //**Addition assignment to dense vectors********************************************************
731  template< typename VT1 > // Type of the target dense vector
732  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
733  {
735 
736  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
737 
738  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
739  return;
740  }
741 
742  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
743  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
744 
745  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
746  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
747  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
748  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
749 
750  if( ( IsComputation<MT>::value && !evaluate ) ||
751  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
752  DMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
753  else
754  DMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
755  }
757  //**********************************************************************************************
758 
759  //**Default addition assignment to dense vectors************************************************
773  template< typename VT1 // Type of the left-hand side target vector
774  , typename MT1 // Type of the left-hand side matrix operand
775  , typename VT2 > // Type of the right-hand side vector operand
776  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
777  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
778  {
779  y.addAssign( A * x );
780  }
782  //**********************************************************************************************
783 
784  //**Vectorized default addition assignment to dense vectors*************************************
798  template< typename VT1 // Type of the left-hand side target vector
799  , typename MT1 // Type of the left-hand side matrix operand
800  , typename VT2 > // Type of the right-hand side vector operand
801  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
802  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
803  {
804  typedef IntrinsicTrait<ElementType> IT;
805 
806  const size_t M( A.rows() );
807  const size_t N( A.columns() );
808 
809  size_t i( 0UL );
810 
811  for( ; (i+8UL) <= M; i+=8UL ) {
812  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
813  for( size_t j=0UL; j<N; j+=IT::size ) {
814  const IntrinsicType x1( x.get(j) );
815  xmm1 = xmm1 + A.get(i ,j) * x1;
816  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
817  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
818  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
819  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
820  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
821  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
822  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
823  }
824  y[i ] += sum( xmm1 );
825  y[i+1UL] += sum( xmm2 );
826  y[i+2UL] += sum( xmm3 );
827  y[i+3UL] += sum( xmm4 );
828  y[i+4UL] += sum( xmm5 );
829  y[i+5UL] += sum( xmm6 );
830  y[i+6UL] += sum( xmm7 );
831  y[i+7UL] += sum( xmm8 );
832  }
833  for( ; (i+4UL) <= M; i+=4UL ) {
834  IntrinsicType xmm1, xmm2, xmm3, xmm4;
835  for( size_t j=0UL; j<N; j+=IT::size ) {
836  const IntrinsicType x1( x.get(j) );
837  xmm1 = xmm1 + A.get(i ,j) * x1;
838  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
839  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
840  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
841  }
842  y[i ] += sum( xmm1 );
843  y[i+1UL] += sum( xmm2 );
844  y[i+2UL] += sum( xmm3 );
845  y[i+3UL] += sum( xmm4 );
846  }
847  for( ; (i+3UL) <= M; i+=3UL ) {
848  IntrinsicType xmm1, xmm2, xmm3;
849  for( size_t j=0UL; j<N; j+=IT::size ) {
850  const IntrinsicType x1( x.get(j) );
851  xmm1 = xmm1 + A.get(i ,j) * x1;
852  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
853  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
854  }
855  y[i ] += sum( xmm1 );
856  y[i+1UL] += sum( xmm2 );
857  y[i+2UL] += sum( xmm3 );
858  }
859  for( ; (i+2UL) <= M; i+=2UL ) {
860  IntrinsicType xmm1, xmm2;
861  for( size_t j=0UL; j<N; j+=IT::size ) {
862  const IntrinsicType x1( x.get(j) );
863  xmm1 = xmm1 + A.get(i ,j) * x1;
864  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
865  }
866  y[i ] += sum( xmm1 );
867  y[i+1UL] += sum( xmm2 );
868  }
869  if( i < M ) {
870  IntrinsicType xmm1;
871  for( size_t j=0UL; j<N; j+=IT::size ) {
872  xmm1 = xmm1 + A.get(i,j) * x.get(j);
873  }
874  y[i] += sum( xmm1 );
875  }
876  }
878  //**********************************************************************************************
879 
880  //**BLAS-based addition assignment to dense vectors (default)***********************************
894  template< typename VT1 // Type of the left-hand side target vector
895  , typename MT1 // Type of the left-hand side matrix operand
896  , typename VT2 > // Type of the right-hand side vector operand
897  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
898  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
899  {
900  selectDefaultAddAssignKernel( y, A, x );
901  }
903  //**********************************************************************************************
904 
905  //**BLAS-based addition assignment to dense vectors (single precision)**************************
906 #if BLAZE_BLAS_MODE
907 
920  template< typename VT1 // Type of the left-hand side target vector
921  , typename MT1 // Type of the left-hand side matrix operand
922  , typename VT2 > // Type of the right-hand side vector operand
923  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
924  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
925  {
926  using boost::numeric_cast;
927 
928  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
929  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
930  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
931 
932  const int M ( numeric_cast<int>( A.rows() ) );
933  const int N ( numeric_cast<int>( A.columns() ) );
934  const int lda( numeric_cast<int>( A.spacing() ) );
935 
936  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
937  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
938  }
940 #endif
941  //**********************************************************************************************
942 
943  //**BLAS-based addition assignment to dense vectors (double precision)**************************
944 #if BLAZE_BLAS_MODE
945 
958  template< typename VT1 // Type of the left-hand side target vector
959  , typename MT1 // Type of the left-hand side matrix operand
960  , typename VT2 > // Type of the right-hand side vector operand
961  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
962  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
963  {
964  using boost::numeric_cast;
965 
966  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
967  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
968  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
969 
970  const int M ( numeric_cast<int>( A.rows() ) );
971  const int N ( numeric_cast<int>( A.columns() ) );
972  const int lda( numeric_cast<int>( A.spacing() ) );
973 
974  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
975  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
976  }
978 #endif
979  //**********************************************************************************************
980 
981  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
982 #if BLAZE_BLAS_MODE
983 
996  template< typename VT1 // Type of the left-hand side target vector
997  , typename MT1 // Type of the left-hand side matrix operand
998  , typename VT2 > // Type of the right-hand side vector operand
999  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1000  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1001  {
1002  using boost::numeric_cast;
1003 
1004  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1005  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1006  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1007  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1008  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1009  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1010 
1011  const int M ( numeric_cast<int>( A.rows() ) );
1012  const int N ( numeric_cast<int>( A.columns() ) );
1013  const int lda( numeric_cast<int>( A.spacing() ) );
1014  const complex<float> alpha( 1.0F, 0.0F );
1015  const complex<float> beta ( 1.0F, 0.0F );
1016 
1017  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1018  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1019  }
1021 #endif
1022  //**********************************************************************************************
1023 
1024  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1025 #if BLAZE_BLAS_MODE
1026 
1039  template< typename VT1 // Type of the left-hand side target vector
1040  , typename MT1 // Type of the left-hand side matrix operand
1041  , typename VT2 > // Type of the right-hand side vector operand
1042  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1043  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1044  {
1045  using boost::numeric_cast;
1046 
1047  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1048  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1049  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1050  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1051  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1052  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1053 
1054  const int M ( numeric_cast<int>( A.rows() ) );
1055  const int N ( numeric_cast<int>( A.columns() ) );
1056  const int lda( numeric_cast<int>( A.spacing() ) );
1057  const complex<double> alpha( 1.0, 0.0 );
1058  const complex<double> beta ( 1.0, 0.0 );
1059 
1060  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1061  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1062  }
1064 #endif
1065  //**********************************************************************************************
1066 
1067  //**Addition assignment to sparse vectors*******************************************************
1068  // No special implementation for the addition assignment to sparse vectors.
1069  //**********************************************************************************************
1070 
1071  //**Subtraction assignment to dense vectors*****************************************************
1083  template< typename VT1 > // Type of the target dense vector
1084  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1085  {
1087 
1088  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1089 
1090  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1091  return;
1092  }
1093 
1094  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1095  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1096 
1097  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1098  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1099  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1100  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1101 
1102  if( ( IsComputation<MT>::value && !evaluate ) ||
1103  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1104  DMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1105  else
1106  DMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1107  }
1109  //**********************************************************************************************
1110 
1111  //**Default subtraction assignment to dense vectors*********************************************
1125  template< typename VT1 // Type of the left-hand side target vector
1126  , typename MT1 // Type of the left-hand side matrix operand
1127  , typename VT2 > // Type of the right-hand side vector operand
1128  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1129  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1130  {
1131  y.subAssign( A * x );
1132  }
1134  //**********************************************************************************************
1135 
1136  //**Vectorized default subtraction assignment to dense vectors**********************************
1150  template< typename VT1 // Type of the left-hand side target vector
1151  , typename MT1 // Type of the left-hand side matrix operand
1152  , typename VT2 > // Type of the right-hand side vector operand
1153  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1154  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1155  {
1156  typedef IntrinsicTrait<ElementType> IT;
1157 
1158  const size_t M( A.rows() );
1159  const size_t N( A.columns() );
1160 
1161  size_t i( 0UL );
1162 
1163  for( ; (i+8UL) <= M; i+=8UL ) {
1164  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1165  for( size_t j=0UL; j<N; j+=IT::size ) {
1166  const IntrinsicType x1( x.get(j) );
1167  xmm1 = xmm1 + A.get(i ,j) * x1;
1168  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1169  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1170  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1171  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1172  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1173  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1174  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1175  }
1176  y[i ] -= sum( xmm1 );
1177  y[i+1UL] -= sum( xmm2 );
1178  y[i+2UL] -= sum( xmm3 );
1179  y[i+3UL] -= sum( xmm4 );
1180  y[i+4UL] -= sum( xmm5 );
1181  y[i+5UL] -= sum( xmm6 );
1182  y[i+6UL] -= sum( xmm7 );
1183  y[i+7UL] -= sum( xmm8 );
1184  }
1185  for( ; (i+4UL) <= M; i+=4UL ) {
1186  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1187  for( size_t j=0UL; j<N; j+=IT::size ) {
1188  const IntrinsicType x1( x.get(j) );
1189  xmm1 = xmm1 + A.get(i ,j) * x1;
1190  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1191  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1192  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1193  }
1194  y[i ] -= sum( xmm1 );
1195  y[i+1UL] -= sum( xmm2 );
1196  y[i+2UL] -= sum( xmm3 );
1197  y[i+3UL] -= sum( xmm4 );
1198  }
1199  for( ; (i+3UL) <= M; i+=3UL ) {
1200  IntrinsicType xmm1, xmm2, xmm3;
1201  for( size_t j=0UL; j<N; j+=IT::size ) {
1202  const IntrinsicType x1( x.get(j) );
1203  xmm1 = xmm1 + A.get(i ,j) * x1;
1204  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1205  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1206  }
1207  y[i ] -= sum( xmm1 );
1208  y[i+1UL] -= sum( xmm2 );
1209  y[i+2UL] -= sum( xmm3 );
1210  }
1211  for( ; (i+2UL) <= M; i+=2UL ) {
1212  IntrinsicType xmm1, xmm2;
1213  for( size_t j=0UL; j<N; j+=IT::size ) {
1214  const IntrinsicType x1( x.get(j) );
1215  xmm1 = xmm1 + A.get(i ,j) * x1;
1216  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1217  }
1218  y[i ] -= sum( xmm1 );
1219  y[i+1UL] -= sum( xmm2 );
1220  }
1221  if( i < M ) {
1222  IntrinsicType xmm1;
1223  for( size_t j=0UL; j<N; j+=IT::size ) {
1224  xmm1 = xmm1 + A.get(i,j) * x.get(j);
1225  }
1226  y[i] -= sum( xmm1 );
1227  }
1228  }
1230  //**********************************************************************************************
1231 
1232  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1246  template< typename VT1 // Type of the left-hand side target vector
1247  , typename MT1 // Type of the left-hand side matrix operand
1248  , typename VT2 > // Type of the right-hand side vector operand
1249  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1250  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1251  {
1252  selectDefaultSubAssignKernel( y, A, x );
1253  }
1255  //**********************************************************************************************
1256 
1257  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1258 #if BLAZE_BLAS_MODE
1259 
1272  template< typename VT1 // Type of the left-hand side target vector
1273  , typename MT1 // Type of the left-hand side matrix operand
1274  , typename VT2 > // Type of the right-hand side vector operand
1275  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1276  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1277  {
1278  using boost::numeric_cast;
1279 
1280  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1281  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1282  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1283 
1284  const int M ( numeric_cast<int>( A.rows() ) );
1285  const int N ( numeric_cast<int>( A.columns() ) );
1286  const int lda( numeric_cast<int>( A.spacing() ) );
1287 
1288  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1289  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1290  }
1292 #endif
1293  //**********************************************************************************************
1294 
1295  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1296 #if BLAZE_BLAS_MODE
1297 
1310  template< typename VT1 // Type of the left-hand side target vector
1311  , typename MT1 // Type of the left-hand side matrix operand
1312  , typename VT2 > // Type of the right-hand side vector operand
1313  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1314  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1315  {
1316  using boost::numeric_cast;
1317 
1318  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1319  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1320  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1321 
1322  const int M ( numeric_cast<int>( A.rows() ) );
1323  const int N ( numeric_cast<int>( A.columns() ) );
1324  const int lda( numeric_cast<int>( A.spacing() ) );
1325 
1326  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1327  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1328  }
1330 #endif
1331  //**********************************************************************************************
1332 
1333  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1334 #if BLAZE_BLAS_MODE
1335 
1348  template< typename VT1 // Type of the left-hand side target vector
1349  , typename MT1 // Type of the left-hand side matrix operand
1350  , typename VT2 > // Type of the right-hand side vector operand
1351  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1352  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1353  {
1354  using boost::numeric_cast;
1355 
1356  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1357  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1358  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1359  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1360  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1361  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1362 
1363  const int M ( numeric_cast<int>( A.rows() ) );
1364  const int N ( numeric_cast<int>( A.columns() ) );
1365  const int lda( numeric_cast<int>( A.spacing() ) );
1366  const complex<float> alpha( -1.0F, 0.0F );
1367  const complex<float> beta ( 1.0F, 0.0F );
1368 
1369  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1370  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1371  }
1373 #endif
1374  //**********************************************************************************************
1375 
1376  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1377 #if BLAZE_BLAS_MODE
1378 
1391  template< typename VT1 // Type of the left-hand side target vector
1392  , typename MT1 // Type of the left-hand side matrix operand
1393  , typename VT2 > // Type of the right-hand side vector operand
1394  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1395  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1396  {
1397  using boost::numeric_cast;
1398 
1399  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1400  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1401  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1402  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1403  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1404  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1405 
1406  const int M ( numeric_cast<int>( A.rows() ) );
1407  const int N ( numeric_cast<int>( A.columns() ) );
1408  const int lda( numeric_cast<int>( A.spacing() ) );
1409  const complex<double> alpha( -1.0, 0.0 );
1410  const complex<double> beta ( 1.0, 0.0 );
1411 
1412  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1413  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1414  }
1416 #endif
1417  //**********************************************************************************************
1418 
1419  //**Subtraction assignment to sparse vectors****************************************************
1420  // No special implementation for the subtraction assignment to sparse vectors.
1421  //**********************************************************************************************
1422 
1423  //**Multiplication assignment to dense vectors**************************************************
1435  template< typename VT1 > // Type of the target dense vector
1436  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1437  {
1439 
1442  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1443 
1444  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1445 
1446  const ResultType tmp( rhs );
1447  multAssign( ~lhs, tmp );
1448  }
1450  //**********************************************************************************************
1451 
1452  //**Multiplication assignment to sparse vectors*************************************************
1453  // No special implementation for the multiplication assignment to sparse vectors.
1454  //**********************************************************************************************
1455 
1456  //**Compile time checks*************************************************************************
1463  //**********************************************************************************************
1464 };
1465 //*************************************************************************************************
1466 
1467 
1468 
1469 
1470 //=================================================================================================
1471 //
1472 // DVECSCALARMULTEXPR SPECIALIZATION
1473 //
1474 //=================================================================================================
1475 
1476 //*************************************************************************************************
1484 template< typename MT // Type of the left-hand side dense matrix
1485  , typename VT // Type of the right-hand side dense vector
1486  , typename ST > // Type of the scalar value
1487 class DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >
1488  : public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1489  , private VecScalarMultExpr
1490  , private Computation
1491 {
1492  private:
1493  //**Type definitions****************************************************************************
1494  typedef DMatDVecMultExpr<MT,VT> MVM;
1495  typedef typename MVM::ResultType RES;
1496  typedef typename MT::ResultType MRT;
1497  typedef typename VT::ResultType VRT;
1498  typedef typename MRT::ElementType MET;
1499  typedef typename VRT::ElementType VET;
1500  typedef typename MT::CompositeType MCT;
1501  typedef typename VT::CompositeType VCT;
1502  //**********************************************************************************************
1503 
1504  //**********************************************************************************************
1506  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1507  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1508  //**********************************************************************************************
1509 
1510  //**********************************************************************************************
1512 
1515  template< typename T1, typename T2, typename T3, typename T4 >
1516  struct UseSinglePrecisionKernel {
1517  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1518  IsFloat<typename T1::ElementType>::value &&
1519  IsFloat<typename T2::ElementType>::value &&
1520  IsFloat<typename T3::ElementType>::value &&
1521  !IsComplex<T4>::value };
1522  };
1523  //**********************************************************************************************
1524 
1525  //**********************************************************************************************
1527 
1530  template< typename T1, typename T2, typename T3, typename T4 >
1531  struct UseDoublePrecisionKernel {
1532  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1533  IsDouble<typename T1::ElementType>::value &&
1534  IsDouble<typename T2::ElementType>::value &&
1535  IsDouble<typename T3::ElementType>::value &&
1536  !IsComplex<T4>::value };
1537  };
1538  //**********************************************************************************************
1539 
1540  //**********************************************************************************************
1542 
1545  template< typename T1, typename T2, typename T3 >
1546  struct UseSinglePrecisionComplexKernel {
1547  typedef complex<float> Type;
1548  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1549  IsSame<typename T1::ElementType,Type>::value &&
1550  IsSame<typename T2::ElementType,Type>::value &&
1551  IsSame<typename T3::ElementType,Type>::value };
1552  };
1553  //**********************************************************************************************
1554 
1555  //**********************************************************************************************
1557 
1560  template< typename T1, typename T2, typename T3 >
1561  struct UseDoublePrecisionComplexKernel {
1562  typedef complex<double> Type;
1563  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1564  IsSame<typename T1::ElementType,Type>::value &&
1565  IsSame<typename T2::ElementType,Type>::value &&
1566  IsSame<typename T3::ElementType,Type>::value };
1567  };
1568  //**********************************************************************************************
1569 
1570  //**********************************************************************************************
1572 
1574  template< typename T1, typename T2, typename T3, typename T4 >
1575  struct UseDefaultKernel {
1576  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1577  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1578  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1579  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1580  };
1581  //**********************************************************************************************
1582 
1583  //**********************************************************************************************
1585 
1588  template< typename T1, typename T2, typename T3, typename T4 >
1589  struct UseVectorizedDefaultKernel {
1590  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1591  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1592  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1593  IsSame<typename T1::ElementType,T4>::value &&
1594  IntrinsicTrait<typename T1::ElementType>::addition &&
1595  IntrinsicTrait<typename T1::ElementType>::multiplication };
1596  };
1597  //**********************************************************************************************
1598 
1599  public:
1600  //**Type definitions****************************************************************************
1601  typedef DVecScalarMultExpr<MVM,ST,false> This;
1602  typedef typename MultTrait<RES,ST>::Type ResultType;
1603  typedef typename ResultType::TransposeType TransposeType;
1604  typedef typename ResultType::ElementType ElementType;
1605  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1606  typedef const ElementType ReturnType;
1607  typedef const ResultType CompositeType;
1608 
1610  typedef const DMatDVecMultExpr<MT,VT> LeftOperand;
1611 
1613  typedef ST RightOperand;
1614 
1616  typedef typename SelectType< evaluate, const MRT, MCT >::Type LT;
1617 
1619  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
1620  //**********************************************************************************************
1621 
1622  //**Compilation flags***************************************************************************
1624  enum { vectorizable = 0 };
1625  //**********************************************************************************************
1626 
1627  //**Constructor*********************************************************************************
1633  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1634  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1635  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1636  {}
1637  //**********************************************************************************************
1638 
1639  //**Subscript operator**************************************************************************
1645  inline ReturnType operator[]( size_t index ) const {
1646  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1647  return vector_[index] * scalar_;
1648  }
1649  //**********************************************************************************************
1650 
1651  //**Size function*******************************************************************************
1656  inline size_t size() const {
1657  return vector_.size();
1658  }
1659  //**********************************************************************************************
1660 
1661  //**Left operand access*************************************************************************
1666  inline LeftOperand leftOperand() const {
1667  return vector_;
1668  }
1669  //**********************************************************************************************
1670 
1671  //**Right operand access************************************************************************
1676  inline RightOperand rightOperand() const {
1677  return scalar_;
1678  }
1679  //**********************************************************************************************
1680 
1681  //**********************************************************************************************
1687  template< typename T >
1688  inline bool canAlias( const T* alias ) const {
1689  return vector_.canAlias( alias );
1690  }
1691  //**********************************************************************************************
1692 
1693  //**********************************************************************************************
1699  template< typename T >
1700  inline bool isAliased( const T* alias ) const {
1701  return vector_.isAliased( alias );
1702  }
1703  //**********************************************************************************************
1704 
1705  private:
1706  //**Member variables****************************************************************************
1707  LeftOperand vector_;
1708  RightOperand scalar_;
1709  //**********************************************************************************************
1710 
1711  //**Assignment to dense vectors*****************************************************************
1723  template< typename VT1 > // Type of the target dense vector
1724  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1725  {
1727 
1728  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1729 
1730  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1731  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1732 
1733  if( left.rows() == 0UL ) {
1734  return;
1735  }
1736  else if( left.columns() == 0UL ) {
1737  reset( ~lhs );
1738  return;
1739  }
1740 
1741  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1742  RT x( right ); // Evaluation of the right-hand side dense vector operand
1743 
1744  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1745  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1746  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1747  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1748 
1749  if( ( IsComputation<MT>::value && !evaluate ) ||
1750  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1751  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1752  else
1753  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1754  }
1755  //**********************************************************************************************
1756 
1757  //**Default assignment to dense vectors*********************************************************
1771  template< typename VT1 // Type of the left-hand side target vector
1772  , typename MT1 // Type of the left-hand side matrix operand
1773  , typename VT2 // Type of the right-hand side vector operand
1774  , typename ST2 > // Type of the scalar value
1775  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1776  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1777  {
1778  y.assign( A * x * scalar );
1779  }
1780  //**********************************************************************************************
1781 
1782  //**Vectorized default assignment to dense vectors**********************************************
1796  template< typename VT1 // Type of the left-hand side target vector
1797  , typename MT1 // Type of the left-hand side matrix operand
1798  , typename VT2 // Type of the right-hand side vector operand
1799  , typename ST2 > // Type of the scalar value
1800  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1801  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1802  {
1803  typedef IntrinsicTrait<ElementType> IT;
1804 
1805  const size_t M( A.rows() );
1806  const size_t N( A.columns() );
1807 
1808  size_t i( 0UL );
1809 
1810  for( ; (i+8UL) <= M; i+=8UL ) {
1811  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1812  for( size_t j=0UL; j<N; j+=IT::size ) {
1813  const IntrinsicType x1( x.get(j) );
1814  xmm1 = xmm1 + A.get(i ,j) * x1;
1815  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1816  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1817  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1818  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1819  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1820  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1821  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1822  }
1823  y[i ] = sum( xmm1 ) * scalar;
1824  y[i+1UL] = sum( xmm2 ) * scalar;
1825  y[i+2UL] = sum( xmm3 ) * scalar;
1826  y[i+3UL] = sum( xmm4 ) * scalar;
1827  y[i+4UL] = sum( xmm5 ) * scalar;
1828  y[i+5UL] = sum( xmm6 ) * scalar;
1829  y[i+6UL] = sum( xmm7 ) * scalar;
1830  y[i+7UL] = sum( xmm8 ) * scalar;
1831  }
1832  for( ; (i+4UL) <= M; i+=4UL ) {
1833  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1834  for( size_t j=0UL; j<N; j+=IT::size ) {
1835  const IntrinsicType x1( x.get(j) );
1836  xmm1 = xmm1 + A.get(i ,j) * x1;
1837  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1838  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1839  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1840  }
1841  y[i ] = sum( xmm1 ) * scalar;
1842  y[i+1UL] = sum( xmm2 ) * scalar;
1843  y[i+2UL] = sum( xmm3 ) * scalar;
1844  y[i+3UL] = sum( xmm4 ) * scalar;
1845  }
1846  for( ; (i+3UL) <= M; i+=3UL ) {
1847  IntrinsicType xmm1, xmm2, xmm3;
1848  for( size_t j=0UL; j<N; j+=IT::size ) {
1849  const IntrinsicType x1( x.get(j) );
1850  xmm1 = xmm1 + A.get(i ,j) * x1;
1851  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1852  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1853  }
1854  y[i ] = sum( xmm1 ) * scalar;
1855  y[i+1UL] = sum( xmm2 ) * scalar;
1856  y[i+2UL] = sum( xmm3 ) * scalar;
1857  }
1858  for( ; (i+2UL) <= M; i+=2UL ) {
1859  IntrinsicType xmm1, xmm2;
1860  for( size_t j=0UL; j<N; j+=IT::size ) {
1861  const IntrinsicType x1( x.get(j) );
1862  xmm1 = xmm1 + A.get(i ,j) * x1;
1863  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1864  }
1865  y[i ] = sum( xmm1 ) * scalar;
1866  y[i+1UL] = sum( xmm2 ) * scalar;
1867  }
1868  if( i < M ) {
1869  IntrinsicType xmm1;
1870  for( size_t j=0UL; j<N; j+=IT::size ) {
1871  xmm1 = xmm1 + A.get(i,j) * x.get(j);
1872  }
1873  y[i] = sum( xmm1 ) * scalar;
1874  }
1875  }
1876  //**********************************************************************************************
1877 
1878  //**BLAS-based assignment to dense vectors (default)********************************************
1892  template< typename VT1 // Type of the left-hand side target vector
1893  , typename MT1 // Type of the left-hand side matrix operand
1894  , typename VT2 // Type of the right-hand side vector operand
1895  , typename ST2 > // Type of the scalar value
1896  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1897  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1898  {
1899  selectDefaultAssignKernel( y, A, x, scalar );
1900  }
1901  //**********************************************************************************************
1902 
1903  //**BLAS-based assignment to dense vectors (single precision)***********************************
1904 #if BLAZE_BLAS_MODE
1905 
1918  template< typename VT1 // Type of the left-hand side target vector
1919  , typename MT1 // Type of the left-hand side matrix operand
1920  , typename VT2 // Type of the right-hand side vector operand
1921  , typename ST2 > // Type of the scalar value
1922  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1923  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1924  {
1925  using boost::numeric_cast;
1926 
1927  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1928  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1929  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1930 
1931  const int M ( numeric_cast<int>( A.rows() ) );
1932  const int N ( numeric_cast<int>( A.columns() ) );
1933  const int lda( numeric_cast<int>( A.spacing() ) );
1934 
1935  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1936  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1937  }
1938 #endif
1939  //**********************************************************************************************
1940 
1941  //**BLAS-based assignment to dense vectors (double precision)***********************************
1942 #if BLAZE_BLAS_MODE
1943 
1956  template< typename VT1 // Type of the left-hand side target vector
1957  , typename MT1 // Type of the left-hand side matrix operand
1958  , typename VT2 // Type of the right-hand side vector operand
1959  , typename ST2 > // Type of the scalar value
1960  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1961  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1962  {
1963  using boost::numeric_cast;
1964 
1965  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1966  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1967  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1968 
1969  const int M ( numeric_cast<int>( A.rows() ) );
1970  const int N ( numeric_cast<int>( A.columns() ) );
1971  const int lda( numeric_cast<int>( A.spacing() ) );
1972 
1973  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1974  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1975  }
1976 #endif
1977  //**********************************************************************************************
1978 
1979  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1980 #if BLAZE_BLAS_MODE
1981 
1994  template< typename VT1 // Type of the left-hand side target vector
1995  , typename MT1 // Type of the left-hand side matrix operand
1996  , typename VT2 // Type of the right-hand side vector operand
1997  , typename ST2 > // Type of the scalar value
1998  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1999  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2000  {
2001  using boost::numeric_cast;
2002 
2003  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2004  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2005  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2006  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2007  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2008  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2009 
2010  const int M ( numeric_cast<int>( A.rows() ) );
2011  const int N ( numeric_cast<int>( A.columns() ) );
2012  const int lda( numeric_cast<int>( A.spacing() ) );
2013  const complex<float> alpha( scalar );
2014  const complex<float> beta ( 0.0F, 0.0F );
2015 
2016  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2017  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2018  }
2019 #endif
2020  //**********************************************************************************************
2021 
2022  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2023 #if BLAZE_BLAS_MODE
2024 
2037  template< typename VT1 // Type of the left-hand side target vector
2038  , typename MT1 // Type of the left-hand side matrix operand
2039  , typename VT2 // Type of the right-hand side vector operand
2040  , typename ST2 > // Type of the scalar value
2041  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2042  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2043  {
2044  using boost::numeric_cast;
2045 
2046  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2047  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2048  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2049  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2050  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2051  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2052 
2053  const int M ( numeric_cast<int>( A.rows() ) );
2054  const int N ( numeric_cast<int>( A.columns() ) );
2055  const int lda( numeric_cast<int>( A.spacing() ) );
2056  const complex<double> alpha( scalar );
2057  const complex<double> beta ( 0.0, 0.0 );
2058 
2059  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2060  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2061  }
2062 #endif
2063  //**********************************************************************************************
2064 
2065  //**Assignment to sparse vectors****************************************************************
2076  template< typename VT1 > // Type of the target sparse vector
2077  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2078  {
2080 
2083  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2084 
2085  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2086 
2087  const ResultType tmp( rhs );
2088  assign( ~lhs, tmp );
2089  }
2090  //**********************************************************************************************
2091 
2092  //**Addition assignment to dense vectors********************************************************
2104  template< typename VT1 > // Type of the target dense vector
2105  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2106  {
2108 
2109  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2110 
2111  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2112  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2113 
2114  if( left.rows() == 0UL || left.columns() == 0UL ) {
2115  return;
2116  }
2117 
2118  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2119  RT x( right ); // Evaluation of the right-hand side dense vector operand
2120 
2121  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2122  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2123  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2124  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2125 
2126  if( ( IsComputation<MT>::value && !evaluate ) ||
2127  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2128  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2129  else
2130  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2131  }
2132  //**********************************************************************************************
2133 
2134  //**Default addition assignment to dense vectors************************************************
2148  template< typename VT1 // Type of the left-hand side target vector
2149  , typename MT1 // Type of the left-hand side matrix operand
2150  , typename VT2 // Type of the right-hand side vector operand
2151  , typename ST2 > // Type of the scalar value
2152  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2153  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2154  {
2155  y.addAssign( A * x * scalar );
2156  }
2157  //**********************************************************************************************
2158 
2159  //**Vectorized default addition assignment to dense vectors*************************************
2173  template< typename VT1 // Type of the left-hand side target vector
2174  , typename MT1 // Type of the left-hand side matrix operand
2175  , typename VT2 // Type of the right-hand side vector operand
2176  , typename ST2 > // Type of the scalar value
2177  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2178  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2179  {
2180  typedef IntrinsicTrait<ElementType> IT;
2181 
2182  const size_t M( A.rows() );
2183  const size_t N( A.columns() );
2184 
2185  size_t i( 0UL );
2186 
2187  for( ; (i+8UL) <= M; i+=8UL ) {
2188  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2189  for( size_t j=0UL; j<N; j+=IT::size ) {
2190  const IntrinsicType x1( x.get(j) );
2191  xmm1 = xmm1 + A.get(i ,j) * x1;
2192  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2193  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2194  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2195  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2196  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2197  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2198  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2199  }
2200  y[i ] += sum( xmm1 ) * scalar;
2201  y[i+1UL] += sum( xmm2 ) * scalar;
2202  y[i+2UL] += sum( xmm3 ) * scalar;
2203  y[i+3UL] += sum( xmm4 ) * scalar;
2204  y[i+4UL] += sum( xmm5 ) * scalar;
2205  y[i+5UL] += sum( xmm6 ) * scalar;
2206  y[i+6UL] += sum( xmm7 ) * scalar;
2207  y[i+7UL] += sum( xmm8 ) * scalar;
2208  }
2209  for( ; (i+4UL) <= M; i+=4UL ) {
2210  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2211  for( size_t j=0UL; j<N; j+=IT::size ) {
2212  const IntrinsicType x1( x.get(j) );
2213  xmm1 = xmm1 + A.get(i ,j) * x1;
2214  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2215  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2216  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2217  }
2218  y[i ] += sum( xmm1 ) * scalar;
2219  y[i+1UL] += sum( xmm2 ) * scalar;
2220  y[i+2UL] += sum( xmm3 ) * scalar;
2221  y[i+3UL] += sum( xmm4 ) * scalar;
2222  }
2223  for( ; (i+3UL) <= M; i+=3UL ) {
2224  IntrinsicType xmm1, xmm2, xmm3;
2225  for( size_t j=0UL; j<N; j+=IT::size ) {
2226  const IntrinsicType x1( x.get(j) );
2227  xmm1 = xmm1 + A.get(i ,j) * x1;
2228  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2229  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2230  }
2231  y[i ] += sum( xmm1 ) * scalar;
2232  y[i+1UL] += sum( xmm2 ) * scalar;
2233  y[i+2UL] += sum( xmm3 ) * scalar;
2234  }
2235  for( ; (i+2UL) <= M; i+=2UL ) {
2236  IntrinsicType xmm1, xmm2;
2237  for( size_t j=0UL; j<N; j+=IT::size ) {
2238  const IntrinsicType x1( x.get(j) );
2239  xmm1 = xmm1 + A.get(i ,j) * x1;
2240  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2241  }
2242  y[i ] += sum( xmm1 ) * scalar;
2243  y[i+1UL] += sum( xmm2 ) * scalar;
2244  }
2245  if( i < M ) {
2246  IntrinsicType xmm1;
2247  for( size_t j=0UL; j<N; j+=IT::size ) {
2248  xmm1 = xmm1 + A.get(i,j) * x.get(j);
2249  }
2250  y[i] += sum( xmm1 ) * scalar;
2251  }
2252  }
2253  //**********************************************************************************************
2254 
2255  //**BLAS-based addition assignment to dense vectors (default)***********************************
2269  template< typename VT1 // Type of the left-hand side target vector
2270  , typename MT1 // Type of the left-hand side matrix operand
2271  , typename VT2 // Type of the right-hand side vector operand
2272  , typename ST2 > // Type of the scalar value
2273  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2274  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2275  {
2276  selectDefaultAddAssignKernel( y, A, x, scalar );
2277  }
2278  //**********************************************************************************************
2279 
2280  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2281 #if BLAZE_BLAS_MODE
2282 
2295  template< typename VT1 // Type of the left-hand side target vector
2296  , typename MT1 // Type of the left-hand side matrix operand
2297  , typename VT2 // Type of the right-hand side vector operand
2298  , typename ST2 > // Type of the scalar value
2299  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2300  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2301  {
2302  using boost::numeric_cast;
2303 
2304  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2305  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2306  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2307 
2308  const int M ( numeric_cast<int>( A.rows() ) );
2309  const int N ( numeric_cast<int>( A.columns() ) );
2310  const int lda( numeric_cast<int>( A.spacing() ) );
2311 
2312  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2313  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2314  }
2315 #endif
2316  //**********************************************************************************************
2317 
2318  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2319 #if BLAZE_BLAS_MODE
2320 
2333  template< typename VT1 // Type of the left-hand side target vector
2334  , typename MT1 // Type of the left-hand side matrix operand
2335  , typename VT2 // Type of the right-hand side vector operand
2336  , typename ST2 > // Type of the scalar value
2337  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2338  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2339  {
2340  using boost::numeric_cast;
2341 
2342  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2343  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2344  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2345 
2346  const int M ( numeric_cast<int>( A.rows() ) );
2347  const int N ( numeric_cast<int>( A.columns() ) );
2348  const int lda( numeric_cast<int>( A.spacing() ) );
2349 
2350  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2351  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2352  }
2353 #endif
2354  //**********************************************************************************************
2355 
2356  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2357 #if BLAZE_BLAS_MODE
2358 
2371  template< typename VT1 // Type of the left-hand side target vector
2372  , typename MT1 // Type of the left-hand side matrix operand
2373  , typename VT2 // Type of the right-hand side vector operand
2374  , typename ST2 > // Type of the scalar value
2375  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2376  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2377  {
2378  using boost::numeric_cast;
2379 
2380  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2381  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2382  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2383  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2384  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2385  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2386 
2387  const int M ( numeric_cast<int>( A.rows() ) );
2388  const int N ( numeric_cast<int>( A.columns() ) );
2389  const int lda( numeric_cast<int>( A.spacing() ) );
2390  const complex<float> alpha( scalar );
2391  const complex<float> beta ( 1.0F, 0.0F );
2392 
2393  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2394  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2395  }
2396 #endif
2397  //**********************************************************************************************
2398 
2399  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2400 #if BLAZE_BLAS_MODE
2401 
2414  template< typename VT1 // Type of the left-hand side target vector
2415  , typename MT1 // Type of the left-hand side matrix operand
2416  , typename VT2 // Type of the right-hand side vector operand
2417  , typename ST2 > // Type of the scalar value
2418  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2419  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2420  {
2421  using boost::numeric_cast;
2422 
2423  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2424  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2425  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2426  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2427  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2428  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2429 
2430  const int M ( numeric_cast<int>( A.rows() ) );
2431  const int N ( numeric_cast<int>( A.columns() ) );
2432  const int lda( numeric_cast<int>( A.spacing() ) );
2433  const complex<double> alpha( scalar );
2434  const complex<double> beta ( 1.0, 0.0 );
2435 
2436  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2437  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2438  }
2439 #endif
2440  //**********************************************************************************************
2441 
2442  //**Addition assignment to sparse vectors*******************************************************
2443  // No special implementation for the addition assignment to sparse vectors.
2444  //**********************************************************************************************
2445 
2446  //**Subtraction assignment to dense vectors*****************************************************
2458  template< typename VT1 > // Type of the target dense vector
2459  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2460  {
2462 
2463  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2464 
2465  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2466  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2467 
2468  if( left.rows() == 0UL || left.columns() == 0UL ) {
2469  return;
2470  }
2471 
2472  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2473  RT x( right ); // Evaluation of the right-hand side dense vector operand
2474 
2475  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2476  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2477  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2478  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2479 
2480  if( ( IsComputation<MT>::value && !evaluate ) ||
2481  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2482  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2483  else
2484  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2485  }
2486  //**********************************************************************************************
2487 
2488  //**Default subtraction assignment to dense vectors*********************************************
2502  template< typename VT1 // Type of the left-hand side target vector
2503  , typename MT1 // Type of the left-hand side matrix operand
2504  , typename VT2 // Type of the right-hand side vector operand
2505  , typename ST2 > // Type of the scalar value
2506  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2507  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2508  {
2509  y.subAssign( A * x * scalar );
2510  }
2511  //**********************************************************************************************
2512 
2513  //**Vectorized default subtraction assignment to dense vectors**********************************
2527  template< typename VT1 // Type of the left-hand side target vector
2528  , typename MT1 // Type of the left-hand side matrix operand
2529  , typename VT2 // Type of the right-hand side vector operand
2530  , typename ST2 > // Type of the scalar value
2531  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2532  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2533  {
2534  typedef IntrinsicTrait<ElementType> IT;
2535 
2536  const size_t M( A.rows() );
2537  const size_t N( A.columns() );
2538 
2539  size_t i( 0UL );
2540 
2541  for( ; (i+8UL) <= M; i+=8UL ) {
2542  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2543  for( size_t j=0UL; j<N; j+=IT::size ) {
2544  const IntrinsicType x1( x.get(j) );
2545  xmm1 = xmm1 + A.get(i ,j) * x1;
2546  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2547  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2548  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2549  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2550  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2551  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2552  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2553  }
2554  y[i ] -= sum( xmm1 ) * scalar;
2555  y[i+1UL] -= sum( xmm2 ) * scalar;
2556  y[i+2UL] -= sum( xmm3 ) * scalar;
2557  y[i+3UL] -= sum( xmm4 ) * scalar;
2558  y[i+4UL] -= sum( xmm5 ) * scalar;
2559  y[i+5UL] -= sum( xmm6 ) * scalar;
2560  y[i+6UL] -= sum( xmm7 ) * scalar;
2561  y[i+7UL] -= sum( xmm8 ) * scalar;
2562  }
2563  for( ; (i+4UL) <= M; i+=4UL ) {
2564  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2565  for( size_t j=0UL; j<N; j+=IT::size ) {
2566  const IntrinsicType x1( x.get(j) );
2567  xmm1 = xmm1 + A.get(i ,j) * x1;
2568  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2569  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2570  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2571  }
2572  y[i ] -= sum( xmm1 ) * scalar;
2573  y[i+1UL] -= sum( xmm2 ) * scalar;
2574  y[i+2UL] -= sum( xmm3 ) * scalar;
2575  y[i+3UL] -= sum( xmm4 ) * scalar;
2576  }
2577  for( ; (i+3UL) <= M; i+=3UL ) {
2578  IntrinsicType xmm1, xmm2, xmm3;
2579  for( size_t j=0UL; j<N; j+=IT::size ) {
2580  const IntrinsicType x1( x.get(j) );
2581  xmm1 = xmm1 + A.get(i ,j) * x1;
2582  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2583  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2584  }
2585  y[i ] -= sum( xmm1 ) * scalar;
2586  y[i+1UL] -= sum( xmm2 ) * scalar;
2587  y[i+2UL] -= sum( xmm3 ) * scalar;
2588  }
2589  for( ; (i+2UL) <= M; i+=2UL ) {
2590  IntrinsicType xmm1, xmm2;
2591  for( size_t j=0UL; j<N; j+=IT::size ) {
2592  const IntrinsicType x1( x.get(j) );
2593  xmm1 = xmm1 + A.get(i ,j) * x1;
2594  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2595  }
2596  y[i ] -= sum( xmm1 ) * scalar;
2597  y[i+1UL] -= sum( xmm2 ) * scalar;
2598  }
2599  if( i < M ) {
2600  IntrinsicType xmm1;
2601  for( size_t j=0UL; j<N; j+=IT::size ) {
2602  xmm1 = xmm1 + A.get(i,j) * x.get(j);
2603  }
2604  y[i] -= sum( xmm1 ) * scalar;
2605  }
2606  }
2607  //**********************************************************************************************
2608 
2609  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2623  template< typename VT1 // Type of the left-hand side target vector
2624  , typename MT1 // Type of the left-hand side matrix operand
2625  , typename VT2 // Type of the right-hand side vector operand
2626  , typename ST2 > // Type of the scalar value
2627  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2628  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2629  {
2630  selectDefaultSubAssignKernel( y, A, x, scalar );
2631  }
2632  //**********************************************************************************************
2633 
2634  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2635 #if BLAZE_BLAS_MODE
2636 
2649  template< typename VT1 // Type of the left-hand side target vector
2650  , typename MT1 // Type of the left-hand side matrix operand
2651  , typename VT2 // Type of the right-hand side vector operand
2652  , typename ST2 > // Type of the scalar value
2653  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2654  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2655  {
2656  using boost::numeric_cast;
2657 
2658  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2659  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2660  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2661 
2662  const int M ( numeric_cast<int>( A.rows() ) );
2663  const int N ( numeric_cast<int>( A.columns() ) );
2664  const int lda( numeric_cast<int>( A.spacing() ) );
2665 
2666  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2667  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2668  }
2669 #endif
2670  //**********************************************************************************************
2671 
2672  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2673 #if BLAZE_BLAS_MODE
2674 
2687  template< typename VT1 // Type of the left-hand side target vector
2688  , typename MT1 // Type of the left-hand side matrix operand
2689  , typename VT2 // Type of the right-hand side vector operand
2690  , typename ST2 > // Type of the scalar value
2691  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2692  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2693  {
2694  using boost::numeric_cast;
2695 
2696  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2697  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2698  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2699 
2700  const int M ( numeric_cast<int>( A.rows() ) );
2701  const int N ( numeric_cast<int>( A.columns() ) );
2702  const int lda( numeric_cast<int>( A.spacing() ) );
2703 
2704  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2705  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2706  }
2707 #endif
2708  //**********************************************************************************************
2709 
2710  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2711 #if BLAZE_BLAS_MODE
2712 
2725  template< typename VT1 // Type of the left-hand side target vector
2726  , typename MT1 // Type of the left-hand side matrix operand
2727  , typename VT2 // Type of the right-hand side vector operand
2728  , typename ST2 > // Type of the scalar value
2729  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2730  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2731  {
2732  using boost::numeric_cast;
2733 
2734  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2735  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2736  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2737  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2738  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2739  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2740 
2741  const int M ( numeric_cast<int>( A.rows() ) );
2742  const int N ( numeric_cast<int>( A.columns() ) );
2743  const int lda( numeric_cast<int>( A.spacing() ) );
2744  const complex<float> alpha( -scalar );
2745  const complex<float> beta ( 1.0F, 0.0F );
2746 
2747  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2748  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2749  }
2750 #endif
2751  //**********************************************************************************************
2752 
2753  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2754 #if BLAZE_BLAS_MODE
2755 
2768  template< typename VT1 // Type of the left-hand side target vector
2769  , typename MT1 // Type of the left-hand side matrix operand
2770  , typename VT2 // Type of the right-hand side vector operand
2771  , typename ST2 > // Type of the scalar value
2772  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2773  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2774  {
2775  using boost::numeric_cast;
2776 
2777  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2778  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2779  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2780  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2781  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2782  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2783 
2784  const int M ( numeric_cast<int>( A.rows() ) );
2785  const int N ( numeric_cast<int>( A.columns() ) );
2786  const int lda( numeric_cast<int>( A.spacing() ) );
2787  const complex<double> alpha( -scalar );
2788  const complex<double> beta ( 1.0, 0.0 );
2789 
2790  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2791  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2792  }
2793 #endif
2794  //**********************************************************************************************
2795 
2796  //**Subtraction assignment to sparse vectors****************************************************
2797  // No special implementation for the subtraction assignment to sparse vectors.
2798  //**********************************************************************************************
2799 
2800  //**Multiplication assignment to dense vectors**************************************************
2812  template< typename VT1 > // Type of the target dense vector
2813  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2814  {
2816 
2819  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2820 
2821  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2822 
2823  const ResultType tmp( rhs );
2824  multAssign( ~lhs, tmp );
2825  }
2826  //**********************************************************************************************
2827 
2828  //**Multiplication assignment to sparse vectors*************************************************
2829  // No special implementation for the multiplication assignment to sparse vectors.
2830  //**********************************************************************************************
2831 
2832  //**Compile time checks*************************************************************************
2841  //**********************************************************************************************
2842 };
2844 //*************************************************************************************************
2845 
2846 
2847 
2848 
2849 //=================================================================================================
2850 //
2851 // GLOBAL BINARY ARITHMETIC OPERATORS
2852 //
2853 //=================================================================================================
2854 
2855 //*************************************************************************************************
2885 template< typename T1 // Type of the left-hand side dense matrix
2886  , typename T2 > // Type of the right-hand side dense vector
2887 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
2889 {
2891 
2892  if( (~mat).columns() != (~vec).size() )
2893  throw std::invalid_argument( "Matrix and vector sizes do not match" );
2894 
2895  return DMatDVecMultExpr<T1,T2>( ~mat, ~vec );
2896 }
2897 //*************************************************************************************************
2898 
2899 
2900 
2901 
2902 //=================================================================================================
2903 //
2904 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
2905 //
2906 //=================================================================================================
2907 
2908 //*************************************************************************************************
2921 template< typename T1 // Type of the left-hand side dense matrix
2922  , bool SO // Storage order of the left-hand side dense matrix
2923  , typename T2 > // Type of the right-hand side dense vector
2924 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
2926 {
2928 
2929  return (~mat).leftOperand() * ( (~mat).rightOperand() * vec );
2930 }
2931 //*************************************************************************************************
2932 
2933 } // namespace blaze
2934 
2935 #endif