All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
48 #include <blaze/system/BLAS.h>
50 #include <blaze/util/Assert.h>
51 #include <blaze/util/Complex.h>
56 #include <blaze/util/DisableIf.h>
57 #include <blaze/util/EnableIf.h>
59 #include <blaze/util/SelectType.h>
60 #include <blaze/util/Types.h>
66 
67 
68 namespace blaze {
69 
70 //=================================================================================================
71 //
72 // CLASS TDVECTDMATMULTEXPR
73 //
74 //=================================================================================================
75 
76 //*************************************************************************************************
83 template< typename VT // Type of the left-hand side dense vector
84  , typename MT > // Type of the right-hand side dense matrix
85 class TDVecTDMatMultExpr : public DenseVector< TDVecTDMatMultExpr<VT,MT>, true >
86  , private Expression
87  , private Computation
88 {
89  private:
90  //**Type definitions****************************************************************************
91  typedef typename VT::ResultType VRT;
92  typedef typename MT::ResultType MRT;
93  typedef typename VRT::ElementType VET;
94  typedef typename MRT::ElementType MET;
95  typedef typename VT::CompositeType VCT;
96  typedef typename MT::CompositeType MCT;
97  //**********************************************************************************************
98 
99  //**********************************************************************************************
101  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
103  //**********************************************************************************************
104 
105  //**********************************************************************************************
107 
108 
111  template< typename T1, typename T2, typename T3 >
112  struct UseSinglePrecisionKernel {
113  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
117  };
119  //**********************************************************************************************
120 
121  //**********************************************************************************************
123 
124 
127  template< typename T1, typename T2, typename T3 >
128  struct UseDoublePrecisionKernel {
129  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
130  IsDouble<typename T1::ElementType>::value &&
131  IsDouble<typename T2::ElementType>::value &&
132  IsDouble<typename T3::ElementType>::value };
133  };
135  //**********************************************************************************************
136 
137  //**********************************************************************************************
139 
140 
143  template< typename T1, typename T2, typename T3 >
144  struct UseSinglePrecisionComplexKernel {
145  typedef complex<float> Type;
146  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
147  IsSame<typename T1::ElementType,Type>::value &&
148  IsSame<typename T2::ElementType,Type>::value &&
149  IsSame<typename T3::ElementType,Type>::value };
150  };
152  //**********************************************************************************************
153 
154  //**********************************************************************************************
156 
157 
160  template< typename T1, typename T2, typename T3 >
161  struct UseDoublePrecisionComplexKernel {
162  typedef complex<double> Type;
163  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
164  IsSame<typename T1::ElementType,Type>::value &&
165  IsSame<typename T2::ElementType,Type>::value &&
166  IsSame<typename T3::ElementType,Type>::value };
167  };
169  //**********************************************************************************************
170 
171  //**********************************************************************************************
173 
174 
176  template< typename T1, typename T2, typename T3 >
177  struct UseDefaultKernel {
178  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
179  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
180  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
181  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
182  };
184  //**********************************************************************************************
185 
186  //**********************************************************************************************
188 
189 
192  template< typename T1, typename T2, typename T3 >
193  struct UseVectorizedDefaultKernel {
194  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
195  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
196  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
197  IntrinsicTrait<typename T1::ElementType>::addition &&
198  IntrinsicTrait<typename T1::ElementType>::multiplication };
199  };
201  //**********************************************************************************************
202 
203  public:
204  //**Type definitions****************************************************************************
207  typedef typename ResultType::TransposeType TransposeType;
208  typedef typename ResultType::ElementType ElementType;
210  typedef const ElementType ReturnType;
211  typedef const ResultType CompositeType;
212 
214  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
215 
217  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
218 
220  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
221 
224  //**********************************************************************************************
225 
226  //**Compilation flags***************************************************************************
228  enum { vectorizable = 0 };
229  //**********************************************************************************************
230 
231  //**Constructor*********************************************************************************
237  explicit inline TDVecTDMatMultExpr( const VT& vec, const MT& mat )
238  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
239  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
240  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
241  {
242  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
243  }
244  //**********************************************************************************************
245 
246  //**Subscript operator**************************************************************************
252  inline ReturnType operator[]( size_t index ) const {
253  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
254 
255  ElementType res;
256 
257  if( mat_.rows() != 0UL ) {
258  res = vec_[0UL] * mat_(0UL,index);
259  for( size_t j=1UL; j<end_; j+=2UL ) {
260  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
261  }
262  if( end_ < mat_.rows() ) {
263  res += vec_[end_] * mat_(end_,index);
264  }
265  }
266  else {
267  reset( res );
268  }
269 
270  return res;
271  }
272  //**********************************************************************************************
273 
274  //**Size function*******************************************************************************
279  inline size_t size() const {
280  return mat_.columns();
281  }
282  //**********************************************************************************************
283 
284  //**Left operand access*************************************************************************
289  inline LeftOperand leftOperand() const {
290  return vec_;
291  }
292  //**********************************************************************************************
293 
294  //**Right operand access************************************************************************
299  inline RightOperand rightOperand() const {
300  return mat_;
301  }
302  //**********************************************************************************************
303 
304  //**********************************************************************************************
310  template< typename T >
311  inline bool canAlias( const T* alias ) const {
312  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
313  }
314  //**********************************************************************************************
315 
316  //**********************************************************************************************
322  template< typename T >
323  inline bool isAliased( const T* alias ) const {
324  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
325  }
326  //**********************************************************************************************
327 
328  private:
329  //**Member variables****************************************************************************
332  const size_t end_;
333  //**********************************************************************************************
334 
335  //**Assignment to dense vectors*****************************************************************
348  template< typename VT1 > // Type of the target dense vector
349  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
350  {
352 
353  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
354 
355  if( rhs.mat_.rows() == 0UL ) {
356  reset( ~lhs );
357  return;
358  }
359  else if( rhs.mat_.columns() == 0UL ) {
360  return;
361  }
362 
363  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
364  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
365 
366  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
367  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
368  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
369  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
370 
371  if( ( IsComputation<MT>::value && !evaluate ) ||
372  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
373  TDVecTDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
374  else
375  TDVecTDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
376  }
378  //**********************************************************************************************
379 
380  //**Default assignment to dense vectors*********************************************************
394  template< typename VT1 // Type of the left-hand side target vector
395  , typename VT2 // Type of the left-hand side vector operand
396  , typename MT1 > // Type of the right-hand side matrix operand
397  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
398  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
399  {
400  y.assign( x * A );
401  }
403  //**********************************************************************************************
404 
405  //**Vectorized default assignment to dense vectors**********************************************
419  template< typename VT1 // Type of the left-hand side target vector
420  , typename VT2 // Type of the left-hand side vector operand
421  , typename MT1 > // Type of the right-hand side matrix operand
422  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
423  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
424  {
425  typedef IntrinsicTrait<ElementType> IT;
426 
427  const size_t M( A.rows() );
428  const size_t N( A.columns() );
429 
430  size_t j( 0UL );
431 
432  for( ; (j+8UL) <= N; j+=8UL ) {
433  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
434  for( size_t i=0UL; i<M; i+=IT::size ) {
435  const IntrinsicType x1( x.get(i) );
436  xmm1 = xmm1 + x1 * A.get(i,j );
437  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
438  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
439  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
440  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
441  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
442  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
443  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
444  }
445  y[j ] = sum( xmm1 );
446  y[j+1UL] = sum( xmm2 );
447  y[j+2UL] = sum( xmm3 );
448  y[j+3UL] = sum( xmm4 );
449  y[j+4UL] = sum( xmm5 );
450  y[j+5UL] = sum( xmm6 );
451  y[j+6UL] = sum( xmm7 );
452  y[j+7UL] = sum( xmm8 );
453  }
454  for( ; (j+4UL) <= N; j+=4UL ) {
455  IntrinsicType xmm1, xmm2, xmm3, xmm4;
456  for( size_t i=0UL; i<M; i+=IT::size ) {
457  const IntrinsicType x1( x.get(i) );
458  xmm1 = xmm1 + x1 * A.get(i,j );
459  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
460  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
461  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
462  }
463  y[j ] = sum( xmm1 );
464  y[j+1UL] = sum( xmm2 );
465  y[j+2UL] = sum( xmm3 );
466  y[j+3UL] = sum( xmm4 );
467  }
468  for( ; (j+3UL) <= N; j+=3UL ) {
469  IntrinsicType xmm1, xmm2, xmm3;
470  for( size_t i=0UL; i<M; i+=IT::size ) {
471  const IntrinsicType x1( x.get(i) );
472  xmm1 = xmm1 + x1 * A.get(i,j );
473  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
474  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
475  }
476  y[j ] = sum( xmm1 );
477  y[j+1UL] = sum( xmm2 );
478  y[j+2UL] = sum( xmm3 );
479  }
480  for( ; (j+2UL) <= N; j+=2UL ) {
481  IntrinsicType xmm1, xmm2;
482  for( size_t i=0UL; i<M; i+=IT::size ) {
483  const IntrinsicType x1( x.get(i) );
484  xmm1 = xmm1 + x1 * A.get(i,j );
485  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
486  }
487  y[j ] = sum( xmm1 );
488  y[j+1UL] = sum( xmm2 );
489  }
490  if( j < N ) {
491  IntrinsicType xmm1;
492  for( size_t i=0UL; i<M; i+=IT::size ) {
493  xmm1 = xmm1 + A.get(i,j) * x.get(i);
494  }
495  y[j] = sum( xmm1 );
496  }
497  }
499  //**********************************************************************************************
500 
501  //**BLAS-based assignment to dense vectors (default)********************************************
515  template< typename VT1 // Type of the left-hand side target vector
516  , typename VT2 // Type of the left-hand side vector operand
517  , typename MT1 > // Type of the right-hand side matrix operand
518  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
519  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
520  {
521  selectDefaultAssignKernel( y, x, A );
522  }
524  //**********************************************************************************************
525 
526  //**BLAS-based assignment to dense vectors (single precision)***********************************
527 #if BLAZE_BLAS_MODE
528 
541  template< typename VT1 // Type of the left-hand side target vector
542  , typename VT2 // Type of the left-hand side vector operand
543  , typename MT1 > // Type of the right-hand side matrix operand
544  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
545  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
546  {
547  using boost::numeric_cast;
548 
549  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
550  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
551  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
552 
553  const int M ( numeric_cast<int>( A.rows() ) );
554  const int N ( numeric_cast<int>( A.columns() ) );
555  const int lda( numeric_cast<int>( A.spacing() ) );
556 
557  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
558  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
559  }
561 #endif
562  //**********************************************************************************************
563 
564  //**BLAS-based assignment to dense vectors (double precision)***********************************
565 #if BLAZE_BLAS_MODE
566 
579  template< typename VT1 // Type of the left-hand side target vector
580  , typename VT2 // Type of the left-hand side vector operand
581  , typename MT1 > // Type of the right-hand side matrix operand
582  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
583  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
584  {
585  using boost::numeric_cast;
586 
587  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
588  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
589  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
590 
591  const int M ( numeric_cast<int>( A.rows() ) );
592  const int N ( numeric_cast<int>( A.columns() ) );
593  const int lda( numeric_cast<int>( A.spacing() ) );
594 
595  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
596  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
597  }
599 #endif
600  //**********************************************************************************************
601 
602  //**BLAS-based assignment to dense vectors (single precision complex)***************************
603 #if BLAZE_BLAS_MODE
604 
617  template< typename VT1 // Type of the left-hand side target vector
618  , typename VT2 // Type of the left-hand side vector operand
619  , typename MT1 > // Type of the right-hand side matrix operand
620  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
621  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
622  {
623  using boost::numeric_cast;
624 
625  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
626  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
627  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
628  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
629  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
630  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
631 
632  const int M ( numeric_cast<int>( A.rows() ) );
633  const int N ( numeric_cast<int>( A.columns() ) );
634  const int lda( numeric_cast<int>( A.spacing() ) );
635  const complex<float> alpha( 1.0F, 0.0F );
636  const complex<float> beta ( 0.0F, 0.0F );
637 
638  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
639  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
640  }
642 #endif
643  //**********************************************************************************************
644 
645  //**BLAS-based assignment to dense vectors (double precision complex)***************************
646 #if BLAZE_BLAS_MODE
647 
660  template< typename VT1 // Type of the left-hand side target vector
661  , typename VT2 // Type of the left-hand side vector operand
662  , typename MT1 > // Type of the right-hand side matrix operand
663  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
664  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
665  {
666  using boost::numeric_cast;
667 
668  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
669  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
670  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
671  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
672  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
673  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
674 
675  const int M ( numeric_cast<int>( A.rows() ) );
676  const int N ( numeric_cast<int>( A.columns() ) );
677  const int lda( numeric_cast<int>( A.spacing() ) );
678  const complex<double> alpha( 1.0, 0.0 );
679  const complex<double> beta ( 0.0, 0.0 );
680 
681  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
682  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
683  }
685 #endif
686  //**********************************************************************************************
687 
688  //**Assignment to sparse vectors****************************************************************
701  template< typename VT1 > // Type of the target sparse vector
702  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
703  {
705 
708  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
709 
710  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
711 
712  const ResultType tmp( rhs );
713  assign( ~lhs, tmp );
714  }
716  //**********************************************************************************************
717 
718  //**Addition assignment to dense vectors********************************************************
731  template< typename VT1 > // Type of the target dense vector
732  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
733  {
735 
736  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
737 
738  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
739  return;
740  }
741 
742  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
743  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
744 
745  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
746  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
747  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
748  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
749 
750  if( ( IsComputation<MT>::value && !evaluate ) ||
751  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
752  TDVecTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
753  else
754  TDVecTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
755  }
757  //**********************************************************************************************
758 
759  //**Default addition assignment to dense vectors************************************************
773  template< typename VT1 // Type of the left-hand side target vector
774  , typename VT2 // Type of the left-hand side vector operand
775  , typename MT1 > // Type of the right-hand side matrix operand
776  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
777  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
778  {
779  y.addAssign( x * A );
780  }
782  //**********************************************************************************************
783 
784  //**Vectorized default addition assignment to dense vectors*************************************
798  template< typename VT1 // Type of the left-hand side target vector
799  , typename VT2 // Type of the left-hand side vector operand
800  , typename MT1 > // Type of the right-hand side matrix operand
801  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
802  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
803  {
804  typedef IntrinsicTrait<ElementType> IT;
805 
806  const size_t M( A.rows() );
807  const size_t N( A.columns() );
808 
809  size_t j( 0UL );
810 
811  for( ; (j+8UL) <= N; j+=8UL ) {
812  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
813  for( size_t i=0UL; i<M; i+=IT::size ) {
814  const IntrinsicType x1( x.get(i) );
815  xmm1 = xmm1 + x1 * A.get(i,j );
816  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
817  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
818  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
819  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
820  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
821  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
822  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
823  }
824  y[j ] += sum( xmm1 );
825  y[j+1UL] += sum( xmm2 );
826  y[j+2UL] += sum( xmm3 );
827  y[j+3UL] += sum( xmm4 );
828  y[j+4UL] += sum( xmm5 );
829  y[j+5UL] += sum( xmm6 );
830  y[j+6UL] += sum( xmm7 );
831  y[j+7UL] += sum( xmm8 );
832  }
833  for( ; (j+4UL) <= N; j+=4UL ) {
834  IntrinsicType xmm1, xmm2, xmm3, xmm4;
835  for( size_t i=0UL; i<M; i+=IT::size ) {
836  const IntrinsicType x1( x.get(i) );
837  xmm1 = xmm1 + x1 * A.get(i,j );
838  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
839  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
840  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
841  }
842  y[j ] += sum( xmm1 );
843  y[j+1UL] += sum( xmm2 );
844  y[j+2UL] += sum( xmm3 );
845  y[j+3UL] += sum( xmm4 );
846  }
847  for( ; (j+3UL) <= N; j+=3UL ) {
848  IntrinsicType xmm1, xmm2, xmm3;
849  for( size_t i=0UL; i<M; i+=IT::size ) {
850  const IntrinsicType x1( x.get(i) );
851  xmm1 = xmm1 + x1 * A.get(i,j );
852  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
853  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
854  }
855  y[j ] += sum( xmm1 );
856  y[j+1UL] += sum( xmm2 );
857  y[j+2UL] += sum( xmm3 );
858  }
859  for( ; (j+2UL) <= N; j+=2UL ) {
860  IntrinsicType xmm1, xmm2;
861  for( size_t i=0UL; i<M; i+=IT::size ) {
862  const IntrinsicType x1( x.get(i) );
863  xmm1 = xmm1 + x1 * A.get(i,j );
864  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
865  }
866  y[j ] += sum( xmm1 );
867  y[j+1UL] += sum( xmm2 );
868  }
869  if( j < N ) {
870  IntrinsicType xmm1;
871  for( size_t i=0UL; i<M; i+=IT::size ) {
872  xmm1 = xmm1 + A.get(i,j) * x.get(i);
873  }
874  y[j] += sum( xmm1 );
875  }
876  }
878  //**********************************************************************************************
879 
880  //**BLAS-based addition assignment to dense vectors (default)***********************************
894  template< typename VT1 // Type of the left-hand side target vector
895  , typename VT2 // Type of the left-hand side vector operand
896  , typename MT1 > // Type of the right-hand side matrix operand
897  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
898  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
899  {
900  selectDefaultAddAssignKernel( y, x, A );
901  }
903  //**********************************************************************************************
904 
905  //**BLAS-based addition assignment to dense vectors (single precision)**************************
906 #if BLAZE_BLAS_MODE
907 
920  template< typename VT1 // Type of the left-hand side target vector
921  , typename VT2 // Type of the left-hand side vector operand
922  , typename MT1 > // Type of the right-hand side matrix operand
923  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
924  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
925  {
926  using boost::numeric_cast;
927 
928  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
929  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
930  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
931 
932  const int M ( numeric_cast<int>( A.rows() ) );
933  const int N ( numeric_cast<int>( A.columns() ) );
934  const int lda( numeric_cast<int>( A.spacing() ) );
935 
936  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
937  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
938  }
940 #endif
941  //**********************************************************************************************
942 
943  //**BLAS-based addition assignment to dense vectors (double precision)**************************
944 #if BLAZE_BLAS_MODE
945 
958  template< typename VT1 // Type of the left-hand side target vector
959  , typename VT2 // Type of the left-hand side vector operand
960  , typename MT1 > // Type of the right-hand side matrix operand
961  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
962  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
963  {
964  using boost::numeric_cast;
965 
966  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
967  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
968  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
969 
970  const int M ( numeric_cast<int>( A.rows() ) );
971  const int N ( numeric_cast<int>( A.columns() ) );
972  const int lda( numeric_cast<int>( A.spacing() ) );
973 
974  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
975  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
976  }
978 #endif
979  //**********************************************************************************************
980 
981  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
982 #if BLAZE_BLAS_MODE
983 
996  template< typename VT1 // Type of the left-hand side target vector
997  , typename VT2 // Type of the left-hand side vector operand
998  , typename MT1 > // Type of the right-hand side matrix operand
999  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1000  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1001  {
1002  using boost::numeric_cast;
1003 
1004  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1005  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1006  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1007  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1008  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1009  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1010 
1011  const int M ( numeric_cast<int>( A.rows() ) );
1012  const int N ( numeric_cast<int>( A.columns() ) );
1013  const int lda( numeric_cast<int>( A.spacing() ) );
1014  const complex<float> alpha( 1.0F, 0.0F );
1015  const complex<float> beta ( 1.0F, 0.0F );
1016 
1017  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1018  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1019  }
1021 #endif
1022  //**********************************************************************************************
1023 
1024  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1025 #if BLAZE_BLAS_MODE
1026 
1039  template< typename VT1 // Type of the left-hand side target vector
1040  , typename VT2 // Type of the left-hand side vector operand
1041  , typename MT1 > // Type of the right-hand side matrix operand
1042  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1043  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1044  {
1045  using boost::numeric_cast;
1046 
1047  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1048  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1049  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1050  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1051  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1052  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1053 
1054  const int M ( numeric_cast<int>( A.rows() ) );
1055  const int N ( numeric_cast<int>( A.columns() ) );
1056  const int lda( numeric_cast<int>( A.spacing() ) );
1057  const complex<double> alpha( 1.0, 0.0 );
1058  const complex<double> beta ( 1.0, 0.0 );
1059 
1060  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1061  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1062  }
1064 #endif
1065  //**********************************************************************************************
1066 
1067  //**Addition assignment to sparse vectors*******************************************************
1068  // No special implementation for the addition assignment to sparse vectors.
1069  //**********************************************************************************************
1070 
1071  //**Subtraction assignment to dense vectors*****************************************************
1084  template< typename VT1 > // Type of the target dense vector
1085  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1086  {
1088 
1089  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1090 
1091  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1092  return;
1093  }
1094 
1095  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1096  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1097 
1098  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1099  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1100  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1101  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1102 
1103  if( ( IsComputation<MT>::value && !evaluate ) ||
1104  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1105  TDVecTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1106  else
1107  TDVecTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1108  }
1110  //**********************************************************************************************
1111 
1112  //**Default subtraction assignment to dense vectors*********************************************
1126  template< typename VT1 // Type of the left-hand side target vector
1127  , typename VT2 // Type of the left-hand side vector operand
1128  , typename MT1 > // Type of the right-hand side matrix operand
1129  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1130  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1131  {
1132  y.subAssign( x * A );
1133  }
1135  //**********************************************************************************************
1136 
1137  //**Vectorized default subtraction assignment to dense vectors**********************************
1151  template< typename VT1 // Type of the left-hand side target vector
1152  , typename VT2 // Type of the left-hand side vector operand
1153  , typename MT1 > // Type of the right-hand side matrix operand
1154  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1155  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1156  {
1157  typedef IntrinsicTrait<ElementType> IT;
1158 
1159  const size_t M( A.rows() );
1160  const size_t N( A.columns() );
1161 
1162  size_t j( 0UL );
1163 
1164  for( ; (j+8UL) <= N; j+=8UL ) {
1165  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1166  for( size_t i=0UL; i<M; i+=IT::size ) {
1167  const IntrinsicType x1( x.get(i) );
1168  xmm1 = xmm1 + x1 * A.get(i,j );
1169  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1170  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1171  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1172  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1173  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1174  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1175  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1176  }
1177  y[j ] -= sum( xmm1 );
1178  y[j+1UL] -= sum( xmm2 );
1179  y[j+2UL] -= sum( xmm3 );
1180  y[j+3UL] -= sum( xmm4 );
1181  y[j+4UL] -= sum( xmm5 );
1182  y[j+5UL] -= sum( xmm6 );
1183  y[j+6UL] -= sum( xmm7 );
1184  y[j+7UL] -= sum( xmm8 );
1185  }
1186  for( ; (j+4UL) <= N; j+=4UL ) {
1187  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1188  for( size_t i=0UL; i<M; i+=IT::size ) {
1189  const IntrinsicType x1( x.get(i) );
1190  xmm1 = xmm1 + x1 * A.get(i,j );
1191  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1192  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1193  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1194  }
1195  y[j ] -= sum( xmm1 );
1196  y[j+1UL] -= sum( xmm2 );
1197  y[j+2UL] -= sum( xmm3 );
1198  y[j+3UL] -= sum( xmm4 );
1199  }
1200  for( ; (j+3UL) <= N; j+=3UL ) {
1201  IntrinsicType xmm1, xmm2, xmm3;
1202  for( size_t i=0UL; i<M; i+=IT::size ) {
1203  const IntrinsicType x1( x.get(i) );
1204  xmm1 = xmm1 + x1 * A.get(i,j );
1205  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1206  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1207  }
1208  y[j ] -= sum( xmm1 );
1209  y[j+1UL] -= sum( xmm2 );
1210  y[j+2UL] -= sum( xmm3 );
1211  }
1212  for( ; (j+2UL) <= N; j+=2UL ) {
1213  IntrinsicType xmm1, xmm2;
1214  for( size_t i=0UL; i<M; i+=IT::size ) {
1215  const IntrinsicType x1( x.get(i) );
1216  xmm1 = xmm1 + x1 * A.get(i,j );
1217  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1218  }
1219  y[j ] -= sum( xmm1 );
1220  y[j+1UL] -= sum( xmm2 );
1221  }
1222  if( j < N ) {
1223  IntrinsicType xmm1;
1224  for( size_t i=0UL; i<M; i+=IT::size ) {
1225  xmm1 = xmm1 + A.get(i,j) * x.get(i);
1226  }
1227  y[j] -= sum( xmm1 );
1228  }
1229  }
1231  //**********************************************************************************************
1232 
1233  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1247  template< typename VT1 // Type of the left-hand side target vector
1248  , typename VT2 // Type of the left-hand side vector operand
1249  , typename MT1 > // Type of the right-hand side matrix operand
1250  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1251  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1252  {
1253  selectDefaultSubAssignKernel( y, x, A );
1254  }
1256  //**********************************************************************************************
1257 
1258  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1259 #if BLAZE_BLAS_MODE
1260 
1273  template< typename VT1 // Type of the left-hand side target vector
1274  , typename VT2 // Type of the left-hand side vector operand
1275  , typename MT1 > // Type of the right-hand side matrix operand
1276  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1277  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1278  {
1279  using boost::numeric_cast;
1280 
1281  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1282  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1283  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1284 
1285  const int M ( numeric_cast<int>( A.rows() ) );
1286  const int N ( numeric_cast<int>( A.columns() ) );
1287  const int lda( numeric_cast<int>( A.spacing() ) );
1288 
1289  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -1.0F,
1290  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1291  }
1293 #endif
1294  //**********************************************************************************************
1295 
1296  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1297 #if BLAZE_BLAS_MODE
1298 
1311  template< typename VT1 // Type of the left-hand side target vector
1312  , typename VT2 // Type of the left-hand side vector operand
1313  , typename MT1 > // Type of the right-hand side matrix operand
1314  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1315  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1316  {
1317  using boost::numeric_cast;
1318 
1319  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1320  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1321  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1322 
1323  const int M ( numeric_cast<int>( A.rows() ) );
1324  const int N ( numeric_cast<int>( A.columns() ) );
1325  const int lda( numeric_cast<int>( A.spacing() ) );
1326 
1327  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -1.0,
1328  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1329  }
1331 #endif
1332  //**********************************************************************************************
1333 
1334  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1335 #if BLAZE_BLAS_MODE
1336 
1349  template< typename VT1 // Type of the left-hand side target vector
1350  , typename VT2 // Type of the left-hand side vector operand
1351  , typename MT1 > // Type of the right-hand side matrix operand
1352  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1353  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1354  {
1355  using boost::numeric_cast;
1356 
1357  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1358  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1359  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1360  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1361  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1362  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1363 
1364  const int M ( numeric_cast<int>( A.rows() ) );
1365  const int N ( numeric_cast<int>( A.columns() ) );
1366  const int lda( numeric_cast<int>( A.spacing() ) );
1367  const complex<float> alpha( -1.0F, 0.0F );
1368  const complex<float> beta ( 1.0F, 0.0F );
1369 
1370  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1371  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1372  }
1374 #endif
1375  //**********************************************************************************************
1376 
1377  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1378 #if BLAZE_BLAS_MODE
1379 
1392  template< typename VT1 // Type of the left-hand side target vector
1393  , typename VT2 // Type of the left-hand side vector operand
1394  , typename MT1 > // Type of the right-hand side matrix operand
1395  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1396  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1397  {
1398  using boost::numeric_cast;
1399 
1400  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1401  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1402  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1403  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1404  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1405  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1406 
1407  const int M ( numeric_cast<int>( A.rows() ) );
1408  const int N ( numeric_cast<int>( A.columns() ) );
1409  const int lda( numeric_cast<int>( A.spacing() ) );
1410  const complex<double> alpha( -1.0, 0.0 );
1411  const complex<double> beta ( 1.0, 0.0 );
1412 
1413  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1414  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1415  }
1417 #endif
1418  //**********************************************************************************************
1419 
1420  //**Subtraction assignment to sparse vectors****************************************************
1421  // No special implementation for the subtraction assignment to sparse vectors.
1422  //**********************************************************************************************
1423 
1424  //**Multiplication assignment to dense vectors**************************************************
1437  template< typename VT1 > // Type of the target dense vector
1438  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1439  {
1441 
1444  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1445 
1446  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1447 
1448  const ResultType tmp( rhs );
1449  multAssign( ~lhs, tmp );
1450  }
1452  //**********************************************************************************************
1453 
1454  //**Multiplication assignment to sparse vectors*******************************************************
1455  // No special implementation for the multiplication assignment to sparse vectors.
1456  //**********************************************************************************************
1457 
1458  //**Compile time checks*************************************************************************
1465  //**********************************************************************************************
1466 };
1467 //*************************************************************************************************
1468 
1469 
1470 
1471 
1472 //=================================================================================================
1473 //
1474 // DVECSCALARMULTEXPR SPECIALIZATION
1475 //
1476 //=================================================================================================
1477 
1478 //*************************************************************************************************
1486 template< typename VT // Type of the left-hand side dense vector
1487  , typename MT // Type of the right-hand side dense matrix
1488  , typename ST > // Type of the side scalar value
1489 class DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >
1490  : public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
1491  , private Expression
1492  , private Computation
1493 {
1494  private:
1495  //**Type definitions****************************************************************************
1496  typedef TDVecTDMatMultExpr<VT,MT> VMM;
1497  typedef typename VMM::ResultType RES;
1498  typedef typename VT::ResultType VRT;
1499  typedef typename MT::ResultType MRT;
1500  typedef typename VRT::ElementType VET;
1501  typedef typename MRT::ElementType MET;
1502  typedef typename VT::CompositeType VCT;
1503  typedef typename MT::CompositeType MCT;
1504  //**********************************************************************************************
1505 
1506  //**********************************************************************************************
1508  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1509  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1510  //**********************************************************************************************
1511 
1512  //**********************************************************************************************
1514 
1517  template< typename T1, typename T2, typename T3, typename T4 >
1518  struct UseSinglePrecisionKernel {
1519  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1520  IsFloat<typename T1::ElementType>::value &&
1521  IsFloat<typename T2::ElementType>::value &&
1522  IsFloat<typename T3::ElementType>::value &&
1523  !IsComplex<T4>::value };
1524  };
1525  //**********************************************************************************************
1526 
1527  //**********************************************************************************************
1529 
1532  template< typename T1, typename T2, typename T3, typename T4 >
1533  struct UseDoublePrecisionKernel {
1534  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1535  IsDouble<typename T1::ElementType>::value &&
1536  IsDouble<typename T2::ElementType>::value &&
1537  IsDouble<typename T3::ElementType>::value &&
1538  !IsComplex<T4>::value };
1539  };
1540  //**********************************************************************************************
1541 
1542  //**********************************************************************************************
1544 
1547  template< typename T1, typename T2, typename T3 >
1548  struct UseSinglePrecisionComplexKernel {
1549  typedef complex<float> Type;
1550  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1551  IsSame<typename T1::ElementType,Type>::value &&
1552  IsSame<typename T2::ElementType,Type>::value &&
1553  IsSame<typename T3::ElementType,Type>::value };
1554  };
1555  //**********************************************************************************************
1556 
1557  //**********************************************************************************************
1559 
1562  template< typename T1, typename T2, typename T3 >
1563  struct UseDoublePrecisionComplexKernel {
1564  typedef complex<double> Type;
1565  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1566  IsSame<typename T1::ElementType,Type>::value &&
1567  IsSame<typename T2::ElementType,Type>::value &&
1568  IsSame<typename T3::ElementType,Type>::value };
1569  };
1570  //**********************************************************************************************
1571 
1572  //**********************************************************************************************
1574 
1576  template< typename T1, typename T2, typename T3, typename T4 >
1577  struct UseDefaultKernel {
1578  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1579  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1580  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1581  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1582  };
1583  //**********************************************************************************************
1584 
1585  //**********************************************************************************************
1587 
1590  template< typename T1, typename T2, typename T3, typename T4 >
1591  struct UseVectorizedDefaultKernel {
1592  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1593  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1594  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1595  IsSame<typename T1::ElementType,T4>::value &&
1596  IntrinsicTrait<typename T1::ElementType>::addition &&
1597  IntrinsicTrait<typename T1::ElementType>::multiplication };
1598  };
1599  //**********************************************************************************************
1600 
1601  public:
1602  //**Type definitions****************************************************************************
1603  typedef DVecScalarMultExpr<VMM,ST,true> This;
1604  typedef typename MultTrait<RES,ST>::Type ResultType;
1605  typedef typename ResultType::TransposeType TransposeType;
1606  typedef typename ResultType::ElementType ElementType;
1607  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1608  typedef const ElementType ReturnType;
1609  typedef const ResultType CompositeType;
1610 
1612  typedef const TDVecTDMatMultExpr<VT,MT> LeftOperand;
1613 
1615  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
1616 
1618  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
1619 
1621  typedef typename SelectType< evaluate, const MRT, MCT >::Type RT;
1622  //**********************************************************************************************
1623 
1624  //**Compilation flags***************************************************************************
1626  enum { vectorizable = 0 };
1627  //**********************************************************************************************
1628 
1629  //**Constructor*********************************************************************************
1635  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1636  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1637  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1638  {}
1639  //**********************************************************************************************
1640 
1641  //**Subscript operator**************************************************************************
1647  inline ReturnType operator[]( size_t index ) const {
1648  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1649  return vector_[index] * scalar_;
1650  }
1651  //**********************************************************************************************
1652 
1653  //**Size function*******************************************************************************
1658  inline size_t size() const {
1659  return vector_.size();
1660  }
1661  //**********************************************************************************************
1662 
1663  //**Left operand access*************************************************************************
1668  inline LeftOperand leftOperand() const {
1669  return vector_;
1670  }
1671  //**********************************************************************************************
1672 
1673  //**Right operand access************************************************************************
1678  inline RightOperand rightOperand() const {
1679  return scalar_;
1680  }
1681  //**********************************************************************************************
1682 
1683  //**********************************************************************************************
1689  template< typename T >
1690  inline bool canAlias( const T* alias ) const {
1691  return vector_.canAlias( alias );
1692  }
1693  //**********************************************************************************************
1694 
1695  //**********************************************************************************************
1701  template< typename T >
1702  inline bool isAliased( const T* alias ) const {
1703  return vector_.isAliased( alias );
1704  }
1705  //**********************************************************************************************
1706 
1707  private:
1708  //**Member variables****************************************************************************
1709  LeftOperand vector_;
1710  RightOperand scalar_;
1711  //**********************************************************************************************
1712 
1713  //**Assignment to dense vectors*****************************************************************
1725  template< typename VT1 // Type of the target dense vector
1726  , bool TF > // Transpose flag of the target dense vector
1727  friend inline void assign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
1728  {
1730 
1731  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1732 
1733  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1734  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1735 
1736  if( right.rows() == 0UL ) {
1737  reset( ~lhs );
1738  return;
1739  }
1740  else if( right.columns() == 0UL ) {
1741  return;
1742  }
1743 
1744  LT x( left ); // Evaluation of the left-hand side dense vector operand
1745  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1746 
1747  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1748  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1749  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1750  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1751 
1752  if( ( IsComputation<MT>::value && !evaluate ) ||
1753  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1754  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1755  else
1756  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1757  }
1758  //**********************************************************************************************
1759 
1760  //**Default assignment to dense vectors*********************************************************
1774  template< typename VT1 // Type of the left-hand side target vector
1775  , typename VT2 // Type of the left-hand side vector operand
1776  , typename MT1 // Type of the right-hand side matrix operand
1777  , typename ST2 > // Type of the scalar value
1778  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1779  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1780  {
1781  y.assign( x * A * scalar );
1782  }
1783  //**********************************************************************************************
1784 
1785  //**Vectorized default assignment to dense vectors**********************************************
1799  template< typename VT1 // Type of the left-hand side target vector
1800  , typename VT2 // Type of the left-hand side vector operand
1801  , typename MT1 // Type of the right-hand side matrix operand
1802  , typename ST2 > // Type of the scalar value
1803  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1804  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1805  {
1806  typedef IntrinsicTrait<ElementType> IT;
1807 
1808  const size_t M( A.rows() );
1809  const size_t N( A.columns() );
1810 
1811  size_t j( 0UL );
1812 
1813  for( ; (j+8UL) <= N; j+=8UL ) {
1814  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1815  for( size_t i=0UL; i<M; i+=IT::size ) {
1816  const IntrinsicType x1( x.get(i) );
1817  xmm1 = xmm1 + x1 * A.get(i,j );
1818  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1819  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1820  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1821  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1822  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1823  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1824  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1825  }
1826  y[j ] = sum( xmm1 ) * scalar;
1827  y[j+1UL] = sum( xmm2 ) * scalar;
1828  y[j+2UL] = sum( xmm3 ) * scalar;
1829  y[j+3UL] = sum( xmm4 ) * scalar;
1830  y[j+4UL] = sum( xmm5 ) * scalar;
1831  y[j+5UL] = sum( xmm6 ) * scalar;
1832  y[j+6UL] = sum( xmm7 ) * scalar;
1833  y[j+7UL] = sum( xmm8 ) * scalar;
1834  }
1835  for( ; (j+4UL) <= N; j+=4UL ) {
1836  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1837  for( size_t i=0UL; i<M; i+=IT::size ) {
1838  const IntrinsicType x1( x.get(i) );
1839  xmm1 = xmm1 + x1 * A.get(i,j );
1840  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1841  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1842  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1843  }
1844  y[j ] = sum( xmm1 ) * scalar;
1845  y[j+1UL] = sum( xmm2 ) * scalar;
1846  y[j+2UL] = sum( xmm3 ) * scalar;
1847  y[j+3UL] = sum( xmm4 ) * scalar;
1848  }
1849  for( ; (j+3UL) <= N; j+=3UL ) {
1850  IntrinsicType xmm1, xmm2, xmm3;
1851  for( size_t i=0UL; i<M; i+=IT::size ) {
1852  const IntrinsicType x1( x.get(i) );
1853  xmm1 = xmm1 + x1 * A.get(i,j );
1854  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1855  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1856  }
1857  y[j ] = sum( xmm1 ) * scalar;
1858  y[j+1UL] = sum( xmm2 ) * scalar;
1859  y[j+2UL] = sum( xmm3 ) * scalar;
1860  }
1861  for( ; (j+2UL) <= N; j+=2UL ) {
1862  IntrinsicType xmm1, xmm2;
1863  for( size_t i=0UL; i<M; i+=IT::size ) {
1864  const IntrinsicType x1( x.get(i) );
1865  xmm1 = xmm1 + x1 * A.get(i,j );
1866  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1867  }
1868  y[j ] = sum( xmm1 ) * scalar;
1869  y[j+1UL] = sum( xmm2 ) * scalar;
1870  }
1871  if( j < N ) {
1872  IntrinsicType xmm1;
1873  for( size_t i=0UL; i<M; i+=IT::size ) {
1874  xmm1 = xmm1 + A.get(i,j) * x.get(i);
1875  }
1876  y[j] = sum( xmm1 ) * scalar;
1877  }
1878  }
1879  //**********************************************************************************************
1880 
1881  //**BLAS-based assignment to dense vectors (default)********************************************
1894  template< typename VT1 // Type of the left-hand side target vector
1895  , typename VT2 // Type of the left-hand side vector operand
1896  , typename MT1 // Type of the right-hand side matrix operand
1897  , typename ST2 > // Type of the scalar value
1898  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1899  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1900  {
1901  selectDefaultAssignKernel( y, x, A, scalar );
1902  }
1903  //**********************************************************************************************
1904 
1905  //**BLAS-based assignment to dense vectors (single precision)***********************************
1906 #if BLAZE_BLAS_MODE
1907 
1920  template< typename VT1 // Type of the left-hand side target vector
1921  , typename VT2 // Type of the left-hand side vector operand
1922  , typename MT1 // Type of the right-hand side matrix operand
1923  , typename ST2 > // Type of the scalar value
1924  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1925  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1926  {
1927  using boost::numeric_cast;
1928 
1929  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1930  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1931  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1932 
1933  const int M ( numeric_cast<int>( A.rows() ) );
1934  const int N ( numeric_cast<int>( A.columns() ) );
1935  const int lda( numeric_cast<int>( A.spacing() ) );
1936 
1937  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
1938  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1939  }
1940 #endif
1941  //**********************************************************************************************
1942 
1943  //**BLAS-based assignment to dense vectors (double precision)***********************************
1944 #if BLAZE_BLAS_MODE
1945 
1958  template< typename VT1 // Type of the left-hand side target vector
1959  , typename VT2 // Type of the left-hand side vector operand
1960  , typename MT1 // Type of the right-hand side matrix operand
1961  , typename ST2 > // Type of the scalar value
1962  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1963  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1964  {
1965  using boost::numeric_cast;
1966 
1967  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1968  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1969  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1970 
1971  const int M ( numeric_cast<int>( A.rows() ) );
1972  const int N ( numeric_cast<int>( A.columns() ) );
1973  const int lda( numeric_cast<int>( A.spacing() ) );
1974 
1975  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
1976  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1977  }
1978 #endif
1979  //**********************************************************************************************
1980 
1981  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1982 #if BLAZE_BLAS_MODE
1983 
1997  template< typename VT1 // Type of the left-hand side target vector
1998  , typename VT2 // Type of the left-hand side vector operand
1999  , typename MT1 // Type of the right-hand side matrix operand
2000  , typename ST2 > // Type of the scalar value
2001  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2002  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2003  {
2004  using boost::numeric_cast;
2005 
2006  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2007  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2008  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2010  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2011  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2012  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2013 
2014  const int M ( numeric_cast<int>( A.rows() ) );
2015  const int N ( numeric_cast<int>( A.columns() ) );
2016  const int lda( numeric_cast<int>( A.spacing() ) );
2017  const complex<float> alpha( scalar );
2018  const complex<float> beta ( 0.0F, 0.0F );
2019 
2020  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2021  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2022  }
2023 #endif
2024  //**********************************************************************************************
2025 
2026  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2027 #if BLAZE_BLAS_MODE
2028 
2042  template< typename VT1 // Type of the left-hand side target vector
2043  , typename VT2 // Type of the left-hand side vector operand
2044  , typename MT1 // Type of the right-hand side matrix operand
2045  , typename ST2 > // Type of the scalar value
2046  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2047  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2048  {
2049  using boost::numeric_cast;
2050 
2051  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2052  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2053  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2055  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2056  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2057  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2058 
2059  const int M ( numeric_cast<int>( A.rows() ) );
2060  const int N ( numeric_cast<int>( A.columns() ) );
2061  const int lda( numeric_cast<int>( A.spacing() ) );
2062  const complex<double> alpha( scalar );
2063  const complex<double> beta ( 0.0, 0.0 );
2064 
2065  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2066  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2067  }
2068 #endif
2069  //**********************************************************************************************
2070 
2071  //**Assignment to sparse vectors****************************************************************
2083  template< typename VT1 // Type of the target sparse vector
2084  , bool TF > // Transpose flag of the target sparse vector
2085  friend inline void assign( SparseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2086  {
2088 
2091  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2092 
2093  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2094 
2095  const ResultType tmp( rhs );
2096  assign( ~lhs, tmp );
2097  }
2098  //**********************************************************************************************
2099 
2100  //**Addition assignment to dense vectors********************************************************
2112  template< typename VT1 // Type of the target dense vector
2113  , bool TF > // Transpose flag of the target dense vector
2114  friend inline void addAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2115  {
2117 
2118  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2119 
2120  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2121  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2122 
2123  if( right.rows() == 0UL || right.columns() == 0UL ) {
2124  return;
2125  }
2126 
2127  LT x( left ); // Evaluation of the left-hand side dense vector operand
2128  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2129 
2130  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2131  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2132  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2133  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2134 
2135  if( ( IsComputation<MT>::value && !evaluate ) ||
2136  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2137  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2138  else
2139  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2140  }
2141  //**********************************************************************************************
2142 
2143  //**Default addition assignment to dense vectors************************************************
2157  template< typename VT1 // Type of the left-hand side target vector
2158  , typename VT2 // Type of the left-hand side vector operand
2159  , typename MT1 // Type of the right-hand side matrix operand
2160  , typename ST2 > // Type of the scalar value
2161  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2162  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2163  {
2164  y.addAssign( x * A * scalar );
2165  }
2166  //**********************************************************************************************
2167 
2168  //**Vectorized default addition assignment to dense vectors*************************************
2182  template< typename VT1 // Type of the left-hand side target vector
2183  , typename VT2 // Type of the left-hand side vector operand
2184  , typename MT1 // Type of the right-hand side matrix operand
2185  , typename ST2 > // Type of the scalar value
2186  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2187  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2188  {
2189  typedef IntrinsicTrait<ElementType> IT;
2190 
2191  const size_t M( A.rows() );
2192  const size_t N( A.columns() );
2193 
2194  size_t j( 0UL );
2195 
2196  for( ; (j+8UL) <= N; j+=8UL ) {
2197  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2198  for( size_t i=0UL; i<M; i+=IT::size ) {
2199  const IntrinsicType x1( x.get(i) );
2200  xmm1 = xmm1 + x1 * A.get(i,j );
2201  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2202  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2203  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2204  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2205  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2206  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2207  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2208  }
2209  y[j ] += sum( xmm1 ) * scalar;
2210  y[j+1UL] += sum( xmm2 ) * scalar;
2211  y[j+2UL] += sum( xmm3 ) * scalar;
2212  y[j+3UL] += sum( xmm4 ) * scalar;
2213  y[j+4UL] += sum( xmm5 ) * scalar;
2214  y[j+5UL] += sum( xmm6 ) * scalar;
2215  y[j+6UL] += sum( xmm7 ) * scalar;
2216  y[j+7UL] += sum( xmm8 ) * scalar;
2217  }
2218  for( ; (j+4UL) <= N; j+=4UL ) {
2219  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2220  for( size_t i=0UL; i<M; i+=IT::size ) {
2221  const IntrinsicType x1( x.get(i) );
2222  xmm1 = xmm1 + x1 * A.get(i,j );
2223  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2224  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2225  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2226  }
2227  y[j ] += sum( xmm1 ) * scalar;
2228  y[j+1UL] += sum( xmm2 ) * scalar;
2229  y[j+2UL] += sum( xmm3 ) * scalar;
2230  y[j+3UL] += sum( xmm4 ) * scalar;
2231  }
2232  for( ; (j+3UL) <= N; j+=3UL ) {
2233  IntrinsicType xmm1, xmm2, xmm3;
2234  for( size_t i=0UL; i<M; i+=IT::size ) {
2235  const IntrinsicType x1( x.get(i) );
2236  xmm1 = xmm1 + x1 * A.get(i,j );
2237  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2238  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2239  }
2240  y[j ] += sum( xmm1 ) * scalar;
2241  y[j+1UL] += sum( xmm2 ) * scalar;
2242  y[j+2UL] += sum( xmm3 ) * scalar;
2243  }
2244  for( ; (j+2UL) <= N; j+=2UL ) {
2245  IntrinsicType xmm1, xmm2;
2246  for( size_t i=0UL; i<M; i+=IT::size ) {
2247  const IntrinsicType x1( x.get(i) );
2248  xmm1 = xmm1 + x1 * A.get(i,j );
2249  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2250  }
2251  y[j ] += sum( xmm1 ) * scalar;
2252  y[j+1UL] += sum( xmm2 ) * scalar;
2253  }
2254  if( j < N ) {
2255  IntrinsicType xmm1;
2256  for( size_t i=0UL; i<M; i+=IT::size ) {
2257  xmm1 = xmm1 + A.get(i,j) * x.get(i);
2258  }
2259  y[j] += sum( xmm1 ) * scalar;
2260  }
2261  }
2262  //**********************************************************************************************
2263 
2264  //**BLAS-based addition assignment to dense vectors (default)***********************************
2278  template< typename VT1 // Type of the left-hand side target vector
2279  , typename VT2 // Type of the left-hand side vector operand
2280  , typename MT1 // Type of the right-hand side matrix operand
2281  , typename ST2 > // Type of the scalar value
2282  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2283  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2284  {
2285  selectDefaultAddAssignKernel( y, x, A, scalar );
2286  }
2287  //**********************************************************************************************
2288 
2289  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2290 #if BLAZE_BLAS_MODE
2291 
2304  template< typename VT1 // Type of the left-hand side target vector
2305  , typename VT2 // Type of the left-hand side vector operand
2306  , typename MT1 // Type of the right-hand side matrix operand
2307  , typename ST2 > // Type of the scalar value
2308  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2309  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2310  {
2311  using boost::numeric_cast;
2312 
2313  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2314  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2315  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2316 
2317  const int M ( numeric_cast<int>( A.rows() ) );
2318  const int N ( numeric_cast<int>( A.columns() ) );
2319  const int lda( numeric_cast<int>( A.spacing() ) );
2320 
2321  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
2322  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2323  }
2324 #endif
2325  //**********************************************************************************************
2326 
2327  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2328 #if BLAZE_BLAS_MODE
2329 
2342  template< typename VT1 // Type of the left-hand side target vector
2343  , typename VT2 // Type of the left-hand side vector operand
2344  , typename MT1 // Type of the right-hand side matrix operand
2345  , typename ST2 > // Type of the scalar value
2346  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2347  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2348  {
2349  using boost::numeric_cast;
2350 
2351  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2352  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2353  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2354 
2355  const int M ( numeric_cast<int>( A.rows() ) );
2356  const int N ( numeric_cast<int>( A.columns() ) );
2357  const int lda( numeric_cast<int>( A.spacing() ) );
2358 
2359  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2360  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2361  }
2362 #endif
2363  //**********************************************************************************************
2364 
2365  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2366 #if BLAZE_BLAS_MODE
2367 
2381  template< typename VT1 // Type of the left-hand side target vector
2382  , typename VT2 // Type of the left-hand side vector operand
2383  , typename MT1 // Type of the right-hand side matrix operand
2384  , typename ST2 > // Type of the scalar value
2385  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2386  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2387  {
2388  using boost::numeric_cast;
2389 
2390  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2391  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2392  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2394  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2395  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2396  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2397 
2398  const int M ( numeric_cast<int>( A.rows() ) );
2399  const int N ( numeric_cast<int>( A.columns() ) );
2400  const int lda( numeric_cast<int>( A.spacing() ) );
2401  const complex<float> alpha( scalar );
2402  const complex<float> beta ( 1.0F, 0.0F );
2403 
2404  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2405  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2406  }
2407 #endif
2408  //**********************************************************************************************
2409 
2410  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2411 #if BLAZE_BLAS_MODE
2412 
2426  template< typename VT1 // Type of the left-hand side target vector
2427  , typename VT2 // Type of the left-hand side vector operand
2428  , typename MT1 // Type of the right-hand side matrix operand
2429  , typename ST2 > // Type of the scalar value
2430  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2431  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2432  {
2433  using boost::numeric_cast;
2434 
2435  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2436  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2437  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2439  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2440  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2441  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2442 
2443  const int M ( numeric_cast<int>( A.rows() ) );
2444  const int N ( numeric_cast<int>( A.columns() ) );
2445  const int lda( numeric_cast<int>( A.spacing() ) );
2446  const complex<double> alpha( scalar );
2447  const complex<double> beta ( 1.0, 0.0 );
2448 
2449  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2450  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2451  }
2452 #endif
2453  //**********************************************************************************************
2454 
2455  //**Addition assignment to sparse vectors*******************************************************
2456  // No special implementation for the addition assignment to sparse vectors.
2457  //**********************************************************************************************
2458 
2459  //**Subtraction assignment to dense vectors*****************************************************
2471  template< typename VT1 // Type of the target dense vector
2472  , bool TF > // Transpose flag of the target dense vector
2473  friend inline void subAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2474  {
2476 
2477  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2478 
2479  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2480  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2481 
2482  if( right.rows() == 0UL || right.columns() == 0UL ) {
2483  return;
2484  }
2485 
2486  LT x( left ); // Evaluation of the left-hand side dense vector operand
2487  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2488 
2489  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2490  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2491  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2492  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2493 
2494  if( ( IsComputation<MT>::value && !evaluate ) ||
2495  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2496  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2497  else
2498  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2499  }
2500  //**********************************************************************************************
2501 
2502  //**Default subtraction assignment to dense vectors*********************************************
2516  template< typename VT1 // Type of the left-hand side target vector
2517  , typename VT2 // Type of the left-hand side vector operand
2518  , typename MT1 // Type of the right-hand side matrix operand
2519  , typename ST2 > // Type of the scalar value
2520  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2521  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2522  {
2523  y.subAssign( x * A * scalar );
2524  }
2525  //**********************************************************************************************
2526 
2527  //**Vectorized default subtraction assignment to dense vectors**********************************
2541  template< typename VT1 // Type of the left-hand side target vector
2542  , typename VT2 // Type of the left-hand side vector operand
2543  , typename MT1 // Type of the right-hand side matrix operand
2544  , typename ST2 > // Type of the scalar value
2545  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2546  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2547  {
2548  typedef IntrinsicTrait<ElementType> IT;
2549 
2550  const size_t M( A.rows() );
2551  const size_t N( A.columns() );
2552 
2553  size_t j( 0UL );
2554 
2555  for( ; (j+8UL) <= N; j+=8UL ) {
2556  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2557  for( size_t i=0UL; i<M; i+=IT::size ) {
2558  const IntrinsicType x1( x.get(i) );
2559  xmm1 = xmm1 + x1 * A.get(i,j );
2560  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2561  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2562  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2563  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2564  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2565  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2566  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2567  }
2568  y[j ] -= sum( xmm1 ) * scalar;
2569  y[j+1UL] -= sum( xmm2 ) * scalar;
2570  y[j+2UL] -= sum( xmm3 ) * scalar;
2571  y[j+3UL] -= sum( xmm4 ) * scalar;
2572  y[j+4UL] -= sum( xmm5 ) * scalar;
2573  y[j+5UL] -= sum( xmm6 ) * scalar;
2574  y[j+6UL] -= sum( xmm7 ) * scalar;
2575  y[j+7UL] -= sum( xmm8 ) * scalar;
2576  }
2577  for( ; (j+4UL) <= N; j+=4UL ) {
2578  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2579  for( size_t i=0UL; i<M; i+=IT::size ) {
2580  const IntrinsicType x1( x.get(i) );
2581  xmm1 = xmm1 + x1 * A.get(i,j );
2582  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2583  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2584  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2585  }
2586  y[j ] -= sum( xmm1 ) * scalar;
2587  y[j+1UL] -= sum( xmm2 ) * scalar;
2588  y[j+2UL] -= sum( xmm3 ) * scalar;
2589  y[j+3UL] -= sum( xmm4 ) * scalar;
2590  }
2591  for( ; (j+3UL) <= N; j+=3UL ) {
2592  IntrinsicType xmm1, xmm2, xmm3;
2593  for( size_t i=0UL; i<M; i+=IT::size ) {
2594  const IntrinsicType x1( x.get(i) );
2595  xmm1 = xmm1 + x1 * A.get(i,j );
2596  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2597  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2598  }
2599  y[j ] -= sum( xmm1 ) * scalar;
2600  y[j+1UL] -= sum( xmm2 ) * scalar;
2601  y[j+2UL] -= sum( xmm3 ) * scalar;
2602  }
2603  for( ; (j+2UL) <= N; j+=2UL ) {
2604  IntrinsicType xmm1, xmm2;
2605  for( size_t i=0UL; i<M; i+=IT::size ) {
2606  const IntrinsicType x1( x.get(i) );
2607  xmm1 = xmm1 + x1 * A.get(i,j );
2608  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2609  }
2610  y[j ] -= sum( xmm1 ) * scalar;
2611  y[j+1UL] -= sum( xmm2 ) * scalar;
2612  }
2613  if( j < N ) {
2614  IntrinsicType xmm1;
2615  for( size_t i=0UL; i<M; i+=IT::size ) {
2616  xmm1 = xmm1 + A.get(i,j) * x.get(i);
2617  }
2618  y[j] -= sum( xmm1 ) * scalar;
2619  }
2620  }
2621  //**********************************************************************************************
2622 
2623  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2638  template< typename VT1 // Type of the left-hand side target vector
2639  , typename VT2 // Type of the left-hand side vector operand
2640  , typename MT1 // Type of the right-hand side matrix operand
2641  , typename ST2 > // Type of the scalar value
2642  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2643  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2644  {
2645  selectDefaultSubAssignKernel( y, x, A, scalar );
2646  }
2647  //**********************************************************************************************
2648 
2649  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2650 #if BLAZE_BLAS_MODE
2651 
2664  template< typename VT1 // Type of the left-hand side target vector
2665  , typename VT2 // Type of the left-hand side vector operand
2666  , typename MT1 // Type of the right-hand side matrix operand
2667  , typename ST2 > // Type of the scalar value
2668  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2669  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2670  {
2671  using boost::numeric_cast;
2672 
2673  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2674  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2675  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2676 
2677  const int M ( numeric_cast<int>( A.rows() ) );
2678  const int N ( numeric_cast<int>( A.columns() ) );
2679  const int lda( numeric_cast<int>( A.spacing() ) );
2680 
2681  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2682  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2683  }
2684 #endif
2685  //**********************************************************************************************
2686 
2687  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2688 #if BLAZE_BLAS_MODE
2689 
2702  template< typename VT1 // Type of the left-hand side target vector
2703  , typename VT2 // Type of the left-hand side vector operand
2704  , typename MT1 // Type of the right-hand side matrix operand
2705  , typename ST2 > // Type of the scalar value
2706  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2707  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2708  {
2709  using boost::numeric_cast;
2710 
2711  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2712  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2713  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2714 
2715  const int M ( numeric_cast<int>( A.rows() ) );
2716  const int N ( numeric_cast<int>( A.columns() ) );
2717  const int lda( numeric_cast<int>( A.spacing() ) );
2718 
2719  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2720  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2721  }
2722 #endif
2723  //**********************************************************************************************
2724 
2725  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2726 #if BLAZE_BLAS_MODE
2727 
2742  template< typename VT1 // Type of the left-hand side target vector
2743  , typename VT2 // Type of the left-hand side vector operand
2744  , typename MT1 // Type of the right-hand side matrix operand
2745  , typename ST2 > // Type of the scalar value
2746  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2747  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2748  {
2749  using boost::numeric_cast;
2750 
2751  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2752  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2753  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2755  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2756  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2757  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2758 
2759  const int M ( numeric_cast<int>( A.rows() ) );
2760  const int N ( numeric_cast<int>( A.columns() ) );
2761  const int lda( numeric_cast<int>( A.spacing() ) );
2762  const complex<float> alpha( -scalar );
2763  const complex<float> beta ( 1.0F, 0.0F );
2764 
2765  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2766  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2767  }
2768 #endif
2769  //**********************************************************************************************
2770 
2771  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2772 #if BLAZE_BLAS_MODE
2773 
2788  template< typename VT1 // Type of the left-hand side target vector
2789  , typename VT2 // Type of the left-hand side vector operand
2790  , typename MT1 // Type of the right-hand side matrix operand
2791  , typename ST2 > // Type of the scalar value
2792  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2793  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2794  {
2795  using boost::numeric_cast;
2796 
2797  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2798  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2799  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2801  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2802  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2803  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2804 
2805  const int M ( numeric_cast<int>( A.rows() ) );
2806  const int N ( numeric_cast<int>( A.columns() ) );
2807  const int lda( numeric_cast<int>( A.spacing() ) );
2808  const complex<double> alpha( -scalar );
2809  const complex<double> beta ( 1.0, 0.0 );
2810 
2811  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2812  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2813  }
2814 #endif
2815  //**********************************************************************************************
2816 
2817  //**Subtraction assignment to sparse vectors****************************************************
2818  // No special implementation for the subtraction assignment to sparse vectors.
2819  //**********************************************************************************************
2820 
2821  //**Multiplication assignment to dense vectors**************************************************
2833  template< typename VT1 // Type of the target dense vector
2834  , bool TF > // Transpose flag of the target dense vector
2835  friend inline void multAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2836  {
2838 
2841  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2842 
2843  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2844 
2845  const ResultType tmp( rhs );
2846  multAssign( ~lhs, tmp );
2847  }
2848  //**********************************************************************************************
2849 
2850  //**Multiplication assignment to sparse vectors*******************************************************
2851  // No special implementation for the multiplication assignment to sparse vectors.
2852  //**********************************************************************************************
2853 
2854  //**Compile time checks*************************************************************************
2862  //**********************************************************************************************
2863 };
2865 //*************************************************************************************************
2866 
2867 
2868 
2869 
2870 //=================================================================================================
2871 //
2872 // GLOBAL BINARY ARITHMETIC OPERATORS
2873 //
2874 //=================================================================================================
2875 
2876 //*************************************************************************************************
2907 template< typename T1 // Type of the left-hand side dense vector
2908  , typename T2 > // Type of the right-hand side dense matrix
2909 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecTDMatMultExpr<T1,T2> >::Type
2911 {
2913 
2914  if( (~vec).size() != (~mat).rows() )
2915  throw std::invalid_argument( "Vector and matrix sizes do not match" );
2916 
2917  return TDVecTDMatMultExpr<T1,T2>( ~vec, ~mat );
2918 }
2919 //*************************************************************************************************
2920 
2921 } // namespace blaze
2922 
2923 #endif