All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
41 #include <blaze/math/Intrinsics.h>
42 #include <blaze/math/shims/Reset.h>
49 #include <blaze/system/BLAS.h>
51 #include <blaze/util/Assert.h>
52 #include <blaze/util/Complex.h>
58 #include <blaze/util/DisableIf.h>
59 #include <blaze/util/EnableIf.h>
61 #include <blaze/util/SelectType.h>
62 #include <blaze/util/Types.h>
68 
69 
70 namespace blaze {
71 
72 //=================================================================================================
73 //
74 // CLASS TDVECTDMATMULTEXPR
75 //
76 //=================================================================================================
77 
78 //*************************************************************************************************
85 template< typename VT // Type of the left-hand side dense vector
86  , typename MT > // Type of the right-hand side dense matrix
87 class TDVecTDMatMultExpr : public DenseVector< TDVecTDMatMultExpr<VT,MT>, true >
88  , private TVecMatMultExpr
89  , private Computation
90 {
91  private:
92  //**Type definitions****************************************************************************
93  typedef typename VT::ResultType VRT;
94  typedef typename MT::ResultType MRT;
95  typedef typename VRT::ElementType VET;
96  typedef typename MRT::ElementType MET;
97  typedef typename VT::CompositeType VCT;
98  typedef typename MT::CompositeType MCT;
99  //**********************************************************************************************
100 
101  //**********************************************************************************************
103  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
105  //**********************************************************************************************
106 
107  //**********************************************************************************************
109 
110 
113  template< typename T1, typename T2, typename T3 >
114  struct UseSinglePrecisionKernel {
115  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
119  };
121  //**********************************************************************************************
122 
123  //**********************************************************************************************
125 
126 
129  template< typename T1, typename T2, typename T3 >
130  struct UseDoublePrecisionKernel {
131  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
132  IsDouble<typename T1::ElementType>::value &&
133  IsDouble<typename T2::ElementType>::value &&
134  IsDouble<typename T3::ElementType>::value };
135  };
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141 
142 
145  template< typename T1, typename T2, typename T3 >
146  struct UseSinglePrecisionComplexKernel {
147  typedef complex<float> Type;
148  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
149  IsSame<typename T1::ElementType,Type>::value &&
150  IsSame<typename T2::ElementType,Type>::value &&
151  IsSame<typename T3::ElementType,Type>::value };
152  };
154  //**********************************************************************************************
155 
156  //**********************************************************************************************
158 
159 
162  template< typename T1, typename T2, typename T3 >
163  struct UseDoublePrecisionComplexKernel {
164  typedef complex<double> Type;
165  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
166  IsSame<typename T1::ElementType,Type>::value &&
167  IsSame<typename T2::ElementType,Type>::value &&
168  IsSame<typename T3::ElementType,Type>::value };
169  };
171  //**********************************************************************************************
172 
173  //**********************************************************************************************
175 
176 
178  template< typename T1, typename T2, typename T3 >
179  struct UseDefaultKernel {
180  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
181  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
182  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
183  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
184  };
186  //**********************************************************************************************
187 
188  //**********************************************************************************************
190 
191 
194  template< typename T1, typename T2, typename T3 >
195  struct UseVectorizedDefaultKernel {
196  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
197  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
198  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
199  IntrinsicTrait<typename T1::ElementType>::addition &&
200  IntrinsicTrait<typename T1::ElementType>::multiplication };
201  };
203  //**********************************************************************************************
204 
205  public:
206  //**Type definitions****************************************************************************
209  typedef typename ResultType::TransposeType TransposeType;
210  typedef typename ResultType::ElementType ElementType;
212  typedef const ElementType ReturnType;
213  typedef const ResultType CompositeType;
214 
216  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
217 
219  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
220 
222  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
223 
226  //**********************************************************************************************
227 
228  //**Compilation flags***************************************************************************
230  enum { vectorizable = 0 };
231  //**********************************************************************************************
232 
233  //**Constructor*********************************************************************************
239  explicit inline TDVecTDMatMultExpr( const VT& vec, const MT& mat )
240  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
241  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
242  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
243  {
244  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
245  }
246  //**********************************************************************************************
247 
248  //**Subscript operator**************************************************************************
254  inline ReturnType operator[]( size_t index ) const {
255  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
256 
257  ElementType res;
258 
259  if( mat_.rows() != 0UL ) {
260  res = vec_[0UL] * mat_(0UL,index);
261  for( size_t j=1UL; j<end_; j+=2UL ) {
262  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
263  }
264  if( end_ < mat_.rows() ) {
265  res += vec_[end_] * mat_(end_,index);
266  }
267  }
268  else {
269  reset( res );
270  }
271 
272  return res;
273  }
274  //**********************************************************************************************
275 
276  //**Size function*******************************************************************************
281  inline size_t size() const {
282  return mat_.columns();
283  }
284  //**********************************************************************************************
285 
286  //**Left operand access*************************************************************************
291  inline LeftOperand leftOperand() const {
292  return vec_;
293  }
294  //**********************************************************************************************
295 
296  //**Right operand access************************************************************************
301  inline RightOperand rightOperand() const {
302  return mat_;
303  }
304  //**********************************************************************************************
305 
306  //**********************************************************************************************
312  template< typename T >
313  inline bool canAlias( const T* alias ) const {
314  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
315  }
316  //**********************************************************************************************
317 
318  //**********************************************************************************************
324  template< typename T >
325  inline bool isAliased( const T* alias ) const {
326  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
327  }
328  //**********************************************************************************************
329 
330  private:
331  //**Member variables****************************************************************************
334  const size_t end_;
335  //**********************************************************************************************
336 
337  //**Assignment to dense vectors*****************************************************************
350  template< typename VT1 > // Type of the target dense vector
351  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
352  {
354 
355  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
356 
357  if( rhs.mat_.rows() == 0UL ) {
358  reset( ~lhs );
359  return;
360  }
361  else if( rhs.mat_.columns() == 0UL ) {
362  return;
363  }
364 
365  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
366  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
367 
368  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
369  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
370  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
371  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
372 
373  if( ( IsComputation<MT>::value && !evaluate ) ||
374  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
375  TDVecTDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
376  else
377  TDVecTDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
378  }
380  //**********************************************************************************************
381 
382  //**Default assignment to dense vectors*********************************************************
396  template< typename VT1 // Type of the left-hand side target vector
397  , typename VT2 // Type of the left-hand side vector operand
398  , typename MT1 > // Type of the right-hand side matrix operand
399  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
400  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
401  {
402  y.assign( x * A );
403  }
405  //**********************************************************************************************
406 
407  //**Vectorized default assignment to dense vectors**********************************************
421  template< typename VT1 // Type of the left-hand side target vector
422  , typename VT2 // Type of the left-hand side vector operand
423  , typename MT1 > // Type of the right-hand side matrix operand
424  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
425  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
426  {
427  typedef IntrinsicTrait<ElementType> IT;
428 
429  const size_t M( A.rows() );
430  const size_t N( A.columns() );
431 
432  size_t j( 0UL );
433 
434  for( ; (j+8UL) <= N; j+=8UL ) {
435  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
436  for( size_t i=0UL; i<M; i+=IT::size ) {
437  const IntrinsicType x1( x.get(i) );
438  xmm1 = xmm1 + x1 * A.get(i,j );
439  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
440  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
441  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
442  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
443  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
444  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
445  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
446  }
447  y[j ] = sum( xmm1 );
448  y[j+1UL] = sum( xmm2 );
449  y[j+2UL] = sum( xmm3 );
450  y[j+3UL] = sum( xmm4 );
451  y[j+4UL] = sum( xmm5 );
452  y[j+5UL] = sum( xmm6 );
453  y[j+6UL] = sum( xmm7 );
454  y[j+7UL] = sum( xmm8 );
455  }
456  for( ; (j+4UL) <= N; j+=4UL ) {
457  IntrinsicType xmm1, xmm2, xmm3, xmm4;
458  for( size_t i=0UL; i<M; i+=IT::size ) {
459  const IntrinsicType x1( x.get(i) );
460  xmm1 = xmm1 + x1 * A.get(i,j );
461  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
462  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
463  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
464  }
465  y[j ] = sum( xmm1 );
466  y[j+1UL] = sum( xmm2 );
467  y[j+2UL] = sum( xmm3 );
468  y[j+3UL] = sum( xmm4 );
469  }
470  for( ; (j+3UL) <= N; j+=3UL ) {
471  IntrinsicType xmm1, xmm2, xmm3;
472  for( size_t i=0UL; i<M; i+=IT::size ) {
473  const IntrinsicType x1( x.get(i) );
474  xmm1 = xmm1 + x1 * A.get(i,j );
475  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
476  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
477  }
478  y[j ] = sum( xmm1 );
479  y[j+1UL] = sum( xmm2 );
480  y[j+2UL] = sum( xmm3 );
481  }
482  for( ; (j+2UL) <= N; j+=2UL ) {
483  IntrinsicType xmm1, xmm2;
484  for( size_t i=0UL; i<M; i+=IT::size ) {
485  const IntrinsicType x1( x.get(i) );
486  xmm1 = xmm1 + x1 * A.get(i,j );
487  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
488  }
489  y[j ] = sum( xmm1 );
490  y[j+1UL] = sum( xmm2 );
491  }
492  if( j < N ) {
493  IntrinsicType xmm1;
494  for( size_t i=0UL; i<M; i+=IT::size ) {
495  xmm1 = xmm1 + A.get(i,j) * x.get(i);
496  }
497  y[j] = sum( xmm1 );
498  }
499  }
501  //**********************************************************************************************
502 
503  //**BLAS-based assignment to dense vectors (default)********************************************
517  template< typename VT1 // Type of the left-hand side target vector
518  , typename VT2 // Type of the left-hand side vector operand
519  , typename MT1 > // Type of the right-hand side matrix operand
520  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
521  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
522  {
523  selectDefaultAssignKernel( y, x, A );
524  }
526  //**********************************************************************************************
527 
528  //**BLAS-based assignment to dense vectors (single precision)***********************************
529 #if BLAZE_BLAS_MODE
530 
543  template< typename VT1 // Type of the left-hand side target vector
544  , typename VT2 // Type of the left-hand side vector operand
545  , typename MT1 > // Type of the right-hand side matrix operand
546  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
547  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
548  {
549  using boost::numeric_cast;
550 
551  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
552  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
553  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
554 
555  const int M ( numeric_cast<int>( A.rows() ) );
556  const int N ( numeric_cast<int>( A.columns() ) );
557  const int lda( numeric_cast<int>( A.spacing() ) );
558 
559  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
560  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
561  }
563 #endif
564  //**********************************************************************************************
565 
566  //**BLAS-based assignment to dense vectors (double precision)***********************************
567 #if BLAZE_BLAS_MODE
568 
581  template< typename VT1 // Type of the left-hand side target vector
582  , typename VT2 // Type of the left-hand side vector operand
583  , typename MT1 > // Type of the right-hand side matrix operand
584  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
585  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
586  {
587  using boost::numeric_cast;
588 
589  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
590  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
591  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
592 
593  const int M ( numeric_cast<int>( A.rows() ) );
594  const int N ( numeric_cast<int>( A.columns() ) );
595  const int lda( numeric_cast<int>( A.spacing() ) );
596 
597  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
598  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
599  }
601 #endif
602  //**********************************************************************************************
603 
604  //**BLAS-based assignment to dense vectors (single precision complex)***************************
605 #if BLAZE_BLAS_MODE
606 
619  template< typename VT1 // Type of the left-hand side target vector
620  , typename VT2 // Type of the left-hand side vector operand
621  , typename MT1 > // Type of the right-hand side matrix operand
622  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
623  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
624  {
625  using boost::numeric_cast;
626 
627  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
628  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
629  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
630  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
631  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
632  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
633 
634  const int M ( numeric_cast<int>( A.rows() ) );
635  const int N ( numeric_cast<int>( A.columns() ) );
636  const int lda( numeric_cast<int>( A.spacing() ) );
637  const complex<float> alpha( 1.0F, 0.0F );
638  const complex<float> beta ( 0.0F, 0.0F );
639 
640  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
641  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
642  }
644 #endif
645  //**********************************************************************************************
646 
647  //**BLAS-based assignment to dense vectors (double precision complex)***************************
648 #if BLAZE_BLAS_MODE
649 
662  template< typename VT1 // Type of the left-hand side target vector
663  , typename VT2 // Type of the left-hand side vector operand
664  , typename MT1 > // Type of the right-hand side matrix operand
665  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
666  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
667  {
668  using boost::numeric_cast;
669 
670  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
671  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
672  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
673  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
674  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
675  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
676 
677  const int M ( numeric_cast<int>( A.rows() ) );
678  const int N ( numeric_cast<int>( A.columns() ) );
679  const int lda( numeric_cast<int>( A.spacing() ) );
680  const complex<double> alpha( 1.0, 0.0 );
681  const complex<double> beta ( 0.0, 0.0 );
682 
683  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
684  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
685  }
687 #endif
688  //**********************************************************************************************
689 
690  //**Assignment to sparse vectors****************************************************************
703  template< typename VT1 > // Type of the target sparse vector
704  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
705  {
707 
710  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
711 
712  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
713 
714  const ResultType tmp( rhs );
715  assign( ~lhs, tmp );
716  }
718  //**********************************************************************************************
719 
720  //**Addition assignment to dense vectors********************************************************
733  template< typename VT1 > // Type of the target dense vector
734  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
735  {
737 
738  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
739 
740  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
741  return;
742  }
743 
744  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
745  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
746 
747  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
748  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
749  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
750  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
751 
752  if( ( IsComputation<MT>::value && !evaluate ) ||
753  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
754  TDVecTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
755  else
756  TDVecTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
757  }
759  //**********************************************************************************************
760 
761  //**Default addition assignment to dense vectors************************************************
775  template< typename VT1 // Type of the left-hand side target vector
776  , typename VT2 // Type of the left-hand side vector operand
777  , typename MT1 > // Type of the right-hand side matrix operand
778  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
779  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
780  {
781  y.addAssign( x * A );
782  }
784  //**********************************************************************************************
785 
786  //**Vectorized default addition assignment to dense vectors*************************************
800  template< typename VT1 // Type of the left-hand side target vector
801  , typename VT2 // Type of the left-hand side vector operand
802  , typename MT1 > // Type of the right-hand side matrix operand
803  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
804  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
805  {
806  typedef IntrinsicTrait<ElementType> IT;
807 
808  const size_t M( A.rows() );
809  const size_t N( A.columns() );
810 
811  size_t j( 0UL );
812 
813  for( ; (j+8UL) <= N; j+=8UL ) {
814  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
815  for( size_t i=0UL; i<M; i+=IT::size ) {
816  const IntrinsicType x1( x.get(i) );
817  xmm1 = xmm1 + x1 * A.get(i,j );
818  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
819  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
820  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
821  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
822  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
823  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
824  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
825  }
826  y[j ] += sum( xmm1 );
827  y[j+1UL] += sum( xmm2 );
828  y[j+2UL] += sum( xmm3 );
829  y[j+3UL] += sum( xmm4 );
830  y[j+4UL] += sum( xmm5 );
831  y[j+5UL] += sum( xmm6 );
832  y[j+6UL] += sum( xmm7 );
833  y[j+7UL] += sum( xmm8 );
834  }
835  for( ; (j+4UL) <= N; j+=4UL ) {
836  IntrinsicType xmm1, xmm2, xmm3, xmm4;
837  for( size_t i=0UL; i<M; i+=IT::size ) {
838  const IntrinsicType x1( x.get(i) );
839  xmm1 = xmm1 + x1 * A.get(i,j );
840  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
841  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
842  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
843  }
844  y[j ] += sum( xmm1 );
845  y[j+1UL] += sum( xmm2 );
846  y[j+2UL] += sum( xmm3 );
847  y[j+3UL] += sum( xmm4 );
848  }
849  for( ; (j+3UL) <= N; j+=3UL ) {
850  IntrinsicType xmm1, xmm2, xmm3;
851  for( size_t i=0UL; i<M; i+=IT::size ) {
852  const IntrinsicType x1( x.get(i) );
853  xmm1 = xmm1 + x1 * A.get(i,j );
854  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
855  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
856  }
857  y[j ] += sum( xmm1 );
858  y[j+1UL] += sum( xmm2 );
859  y[j+2UL] += sum( xmm3 );
860  }
861  for( ; (j+2UL) <= N; j+=2UL ) {
862  IntrinsicType xmm1, xmm2;
863  for( size_t i=0UL; i<M; i+=IT::size ) {
864  const IntrinsicType x1( x.get(i) );
865  xmm1 = xmm1 + x1 * A.get(i,j );
866  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
867  }
868  y[j ] += sum( xmm1 );
869  y[j+1UL] += sum( xmm2 );
870  }
871  if( j < N ) {
872  IntrinsicType xmm1;
873  for( size_t i=0UL; i<M; i+=IT::size ) {
874  xmm1 = xmm1 + A.get(i,j) * x.get(i);
875  }
876  y[j] += sum( xmm1 );
877  }
878  }
880  //**********************************************************************************************
881 
882  //**BLAS-based addition assignment to dense vectors (default)***********************************
896  template< typename VT1 // Type of the left-hand side target vector
897  , typename VT2 // Type of the left-hand side vector operand
898  , typename MT1 > // Type of the right-hand side matrix operand
899  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
900  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
901  {
902  selectDefaultAddAssignKernel( y, x, A );
903  }
905  //**********************************************************************************************
906 
907  //**BLAS-based addition assignment to dense vectors (single precision)**************************
908 #if BLAZE_BLAS_MODE
909 
922  template< typename VT1 // Type of the left-hand side target vector
923  , typename VT2 // Type of the left-hand side vector operand
924  , typename MT1 > // Type of the right-hand side matrix operand
925  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
926  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
927  {
928  using boost::numeric_cast;
929 
930  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
931  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
932  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
933 
934  const int M ( numeric_cast<int>( A.rows() ) );
935  const int N ( numeric_cast<int>( A.columns() ) );
936  const int lda( numeric_cast<int>( A.spacing() ) );
937 
938  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
939  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
940  }
942 #endif
943  //**********************************************************************************************
944 
945  //**BLAS-based addition assignment to dense vectors (double precision)**************************
946 #if BLAZE_BLAS_MODE
947 
960  template< typename VT1 // Type of the left-hand side target vector
961  , typename VT2 // Type of the left-hand side vector operand
962  , typename MT1 > // Type of the right-hand side matrix operand
963  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
964  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
965  {
966  using boost::numeric_cast;
967 
968  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
969  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
970  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
971 
972  const int M ( numeric_cast<int>( A.rows() ) );
973  const int N ( numeric_cast<int>( A.columns() ) );
974  const int lda( numeric_cast<int>( A.spacing() ) );
975 
976  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
977  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
978  }
980 #endif
981  //**********************************************************************************************
982 
983  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
984 #if BLAZE_BLAS_MODE
985 
998  template< typename VT1 // Type of the left-hand side target vector
999  , typename VT2 // Type of the left-hand side vector operand
1000  , typename MT1 > // Type of the right-hand side matrix operand
1001  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1002  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1003  {
1004  using boost::numeric_cast;
1005 
1006  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1007  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1008  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1009  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1010  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1011  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1012 
1013  const int M ( numeric_cast<int>( A.rows() ) );
1014  const int N ( numeric_cast<int>( A.columns() ) );
1015  const int lda( numeric_cast<int>( A.spacing() ) );
1016  const complex<float> alpha( 1.0F, 0.0F );
1017  const complex<float> beta ( 1.0F, 0.0F );
1018 
1019  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1020  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1021  }
1023 #endif
1024  //**********************************************************************************************
1025 
1026  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1027 #if BLAZE_BLAS_MODE
1028 
1041  template< typename VT1 // Type of the left-hand side target vector
1042  , typename VT2 // Type of the left-hand side vector operand
1043  , typename MT1 > // Type of the right-hand side matrix operand
1044  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1045  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1046  {
1047  using boost::numeric_cast;
1048 
1049  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1050  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1051  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1052  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1053  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1054  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1055 
1056  const int M ( numeric_cast<int>( A.rows() ) );
1057  const int N ( numeric_cast<int>( A.columns() ) );
1058  const int lda( numeric_cast<int>( A.spacing() ) );
1059  const complex<double> alpha( 1.0, 0.0 );
1060  const complex<double> beta ( 1.0, 0.0 );
1061 
1062  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1063  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1064  }
1066 #endif
1067  //**********************************************************************************************
1068 
1069  //**Addition assignment to sparse vectors*******************************************************
1070  // No special implementation for the addition assignment to sparse vectors.
1071  //**********************************************************************************************
1072 
1073  //**Subtraction assignment to dense vectors*****************************************************
1086  template< typename VT1 > // Type of the target dense vector
1087  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1088  {
1090 
1091  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1092 
1093  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1094  return;
1095  }
1096 
1097  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1098  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1099 
1100  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1101  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1102  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1103  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1104 
1105  if( ( IsComputation<MT>::value && !evaluate ) ||
1106  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1107  TDVecTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1108  else
1109  TDVecTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1110  }
1112  //**********************************************************************************************
1113 
1114  //**Default subtraction assignment to dense vectors*********************************************
1128  template< typename VT1 // Type of the left-hand side target vector
1129  , typename VT2 // Type of the left-hand side vector operand
1130  , typename MT1 > // Type of the right-hand side matrix operand
1131  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1132  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1133  {
1134  y.subAssign( x * A );
1135  }
1137  //**********************************************************************************************
1138 
1139  //**Vectorized default subtraction assignment to dense vectors**********************************
1153  template< typename VT1 // Type of the left-hand side target vector
1154  , typename VT2 // Type of the left-hand side vector operand
1155  , typename MT1 > // Type of the right-hand side matrix operand
1156  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1157  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1158  {
1159  typedef IntrinsicTrait<ElementType> IT;
1160 
1161  const size_t M( A.rows() );
1162  const size_t N( A.columns() );
1163 
1164  size_t j( 0UL );
1165 
1166  for( ; (j+8UL) <= N; j+=8UL ) {
1167  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1168  for( size_t i=0UL; i<M; i+=IT::size ) {
1169  const IntrinsicType x1( x.get(i) );
1170  xmm1 = xmm1 + x1 * A.get(i,j );
1171  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1172  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1173  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1174  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1175  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1176  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1177  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1178  }
1179  y[j ] -= sum( xmm1 );
1180  y[j+1UL] -= sum( xmm2 );
1181  y[j+2UL] -= sum( xmm3 );
1182  y[j+3UL] -= sum( xmm4 );
1183  y[j+4UL] -= sum( xmm5 );
1184  y[j+5UL] -= sum( xmm6 );
1185  y[j+6UL] -= sum( xmm7 );
1186  y[j+7UL] -= sum( xmm8 );
1187  }
1188  for( ; (j+4UL) <= N; j+=4UL ) {
1189  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1190  for( size_t i=0UL; i<M; i+=IT::size ) {
1191  const IntrinsicType x1( x.get(i) );
1192  xmm1 = xmm1 + x1 * A.get(i,j );
1193  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1194  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1195  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1196  }
1197  y[j ] -= sum( xmm1 );
1198  y[j+1UL] -= sum( xmm2 );
1199  y[j+2UL] -= sum( xmm3 );
1200  y[j+3UL] -= sum( xmm4 );
1201  }
1202  for( ; (j+3UL) <= N; j+=3UL ) {
1203  IntrinsicType xmm1, xmm2, xmm3;
1204  for( size_t i=0UL; i<M; i+=IT::size ) {
1205  const IntrinsicType x1( x.get(i) );
1206  xmm1 = xmm1 + x1 * A.get(i,j );
1207  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1208  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1209  }
1210  y[j ] -= sum( xmm1 );
1211  y[j+1UL] -= sum( xmm2 );
1212  y[j+2UL] -= sum( xmm3 );
1213  }
1214  for( ; (j+2UL) <= N; j+=2UL ) {
1215  IntrinsicType xmm1, xmm2;
1216  for( size_t i=0UL; i<M; i+=IT::size ) {
1217  const IntrinsicType x1( x.get(i) );
1218  xmm1 = xmm1 + x1 * A.get(i,j );
1219  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1220  }
1221  y[j ] -= sum( xmm1 );
1222  y[j+1UL] -= sum( xmm2 );
1223  }
1224  if( j < N ) {
1225  IntrinsicType xmm1;
1226  for( size_t i=0UL; i<M; i+=IT::size ) {
1227  xmm1 = xmm1 + A.get(i,j) * x.get(i);
1228  }
1229  y[j] -= sum( xmm1 );
1230  }
1231  }
1233  //**********************************************************************************************
1234 
1235  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1249  template< typename VT1 // Type of the left-hand side target vector
1250  , typename VT2 // Type of the left-hand side vector operand
1251  , typename MT1 > // Type of the right-hand side matrix operand
1252  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1253  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1254  {
1255  selectDefaultSubAssignKernel( y, x, A );
1256  }
1258  //**********************************************************************************************
1259 
1260  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1261 #if BLAZE_BLAS_MODE
1262 
1275  template< typename VT1 // Type of the left-hand side target vector
1276  , typename VT2 // Type of the left-hand side vector operand
1277  , typename MT1 > // Type of the right-hand side matrix operand
1278  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1279  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1280  {
1281  using boost::numeric_cast;
1282 
1283  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1284  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1285  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1286 
1287  const int M ( numeric_cast<int>( A.rows() ) );
1288  const int N ( numeric_cast<int>( A.columns() ) );
1289  const int lda( numeric_cast<int>( A.spacing() ) );
1290 
1291  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -1.0F,
1292  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1293  }
1295 #endif
1296  //**********************************************************************************************
1297 
1298  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1299 #if BLAZE_BLAS_MODE
1300 
1313  template< typename VT1 // Type of the left-hand side target vector
1314  , typename VT2 // Type of the left-hand side vector operand
1315  , typename MT1 > // Type of the right-hand side matrix operand
1316  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1317  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1318  {
1319  using boost::numeric_cast;
1320 
1321  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1322  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1323  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1324 
1325  const int M ( numeric_cast<int>( A.rows() ) );
1326  const int N ( numeric_cast<int>( A.columns() ) );
1327  const int lda( numeric_cast<int>( A.spacing() ) );
1328 
1329  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -1.0,
1330  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1331  }
1333 #endif
1334  //**********************************************************************************************
1335 
1336  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1337 #if BLAZE_BLAS_MODE
1338 
1351  template< typename VT1 // Type of the left-hand side target vector
1352  , typename VT2 // Type of the left-hand side vector operand
1353  , typename MT1 > // Type of the right-hand side matrix operand
1354  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1355  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1356  {
1357  using boost::numeric_cast;
1358 
1359  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1360  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1361  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1362  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1363  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1364  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1365 
1366  const int M ( numeric_cast<int>( A.rows() ) );
1367  const int N ( numeric_cast<int>( A.columns() ) );
1368  const int lda( numeric_cast<int>( A.spacing() ) );
1369  const complex<float> alpha( -1.0F, 0.0F );
1370  const complex<float> beta ( 1.0F, 0.0F );
1371 
1372  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1373  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1374  }
1376 #endif
1377  //**********************************************************************************************
1378 
1379  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1380 #if BLAZE_BLAS_MODE
1381 
1394  template< typename VT1 // Type of the left-hand side target vector
1395  , typename VT2 // Type of the left-hand side vector operand
1396  , typename MT1 > // Type of the right-hand side matrix operand
1397  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1398  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1399  {
1400  using boost::numeric_cast;
1401 
1402  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1403  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1404  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1405  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1406  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1407  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1408 
1409  const int M ( numeric_cast<int>( A.rows() ) );
1410  const int N ( numeric_cast<int>( A.columns() ) );
1411  const int lda( numeric_cast<int>( A.spacing() ) );
1412  const complex<double> alpha( -1.0, 0.0 );
1413  const complex<double> beta ( 1.0, 0.0 );
1414 
1415  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1416  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1417  }
1419 #endif
1420  //**********************************************************************************************
1421 
1422  //**Subtraction assignment to sparse vectors****************************************************
1423  // No special implementation for the subtraction assignment to sparse vectors.
1424  //**********************************************************************************************
1425 
1426  //**Multiplication assignment to dense vectors**************************************************
1439  template< typename VT1 > // Type of the target dense vector
1440  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1441  {
1443 
1446  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1447 
1448  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1449 
1450  const ResultType tmp( rhs );
1451  multAssign( ~lhs, tmp );
1452  }
1454  //**********************************************************************************************
1455 
1456  //**Multiplication assignment to sparse vectors*******************************************************
1457  // No special implementation for the multiplication assignment to sparse vectors.
1458  //**********************************************************************************************
1459 
1460  //**Compile time checks*************************************************************************
1467  //**********************************************************************************************
1468 };
1469 //*************************************************************************************************
1470 
1471 
1472 
1473 
1474 //=================================================================================================
1475 //
1476 // DVECSCALARMULTEXPR SPECIALIZATION
1477 //
1478 //=================================================================================================
1479 
1480 //*************************************************************************************************
1488 template< typename VT // Type of the left-hand side dense vector
1489  , typename MT // Type of the right-hand side dense matrix
1490  , typename ST > // Type of the side scalar value
1491 class DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >
1492  : public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
1493  , private VecScalarMultExpr
1494  , private Computation
1495 {
1496  private:
1497  //**Type definitions****************************************************************************
1498  typedef TDVecTDMatMultExpr<VT,MT> VMM;
1499  typedef typename VMM::ResultType RES;
1500  typedef typename VT::ResultType VRT;
1501  typedef typename MT::ResultType MRT;
1502  typedef typename VRT::ElementType VET;
1503  typedef typename MRT::ElementType MET;
1504  typedef typename VT::CompositeType VCT;
1505  typedef typename MT::CompositeType MCT;
1506  //**********************************************************************************************
1507 
1508  //**********************************************************************************************
1510  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1511  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1512  //**********************************************************************************************
1513 
1514  //**********************************************************************************************
1516 
1519  template< typename T1, typename T2, typename T3, typename T4 >
1520  struct UseSinglePrecisionKernel {
1521  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1522  IsFloat<typename T1::ElementType>::value &&
1523  IsFloat<typename T2::ElementType>::value &&
1524  IsFloat<typename T3::ElementType>::value &&
1525  !IsComplex<T4>::value };
1526  };
1527  //**********************************************************************************************
1528 
1529  //**********************************************************************************************
1531 
1534  template< typename T1, typename T2, typename T3, typename T4 >
1535  struct UseDoublePrecisionKernel {
1536  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1537  IsDouble<typename T1::ElementType>::value &&
1538  IsDouble<typename T2::ElementType>::value &&
1539  IsDouble<typename T3::ElementType>::value &&
1540  !IsComplex<T4>::value };
1541  };
1542  //**********************************************************************************************
1543 
1544  //**********************************************************************************************
1546 
1549  template< typename T1, typename T2, typename T3 >
1550  struct UseSinglePrecisionComplexKernel {
1551  typedef complex<float> Type;
1552  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1553  IsSame<typename T1::ElementType,Type>::value &&
1554  IsSame<typename T2::ElementType,Type>::value &&
1555  IsSame<typename T3::ElementType,Type>::value };
1556  };
1557  //**********************************************************************************************
1558 
1559  //**********************************************************************************************
1561 
1564  template< typename T1, typename T2, typename T3 >
1565  struct UseDoublePrecisionComplexKernel {
1566  typedef complex<double> Type;
1567  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1568  IsSame<typename T1::ElementType,Type>::value &&
1569  IsSame<typename T2::ElementType,Type>::value &&
1570  IsSame<typename T3::ElementType,Type>::value };
1571  };
1572  //**********************************************************************************************
1573 
1574  //**********************************************************************************************
1576 
1578  template< typename T1, typename T2, typename T3, typename T4 >
1579  struct UseDefaultKernel {
1580  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1581  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1582  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1583  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1584  };
1585  //**********************************************************************************************
1586 
1587  //**********************************************************************************************
1589 
1592  template< typename T1, typename T2, typename T3, typename T4 >
1593  struct UseVectorizedDefaultKernel {
1594  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1595  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1596  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1597  IsSame<typename T1::ElementType,T4>::value &&
1598  IntrinsicTrait<typename T1::ElementType>::addition &&
1599  IntrinsicTrait<typename T1::ElementType>::multiplication };
1600  };
1601  //**********************************************************************************************
1602 
1603  public:
1604  //**Type definitions****************************************************************************
1605  typedef DVecScalarMultExpr<VMM,ST,true> This;
1606  typedef typename MultTrait<RES,ST>::Type ResultType;
1607  typedef typename ResultType::TransposeType TransposeType;
1608  typedef typename ResultType::ElementType ElementType;
1609  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1610  typedef const ElementType ReturnType;
1611  typedef const ResultType CompositeType;
1612 
1614  typedef const TDVecTDMatMultExpr<VT,MT> LeftOperand;
1615 
1617  typedef ST RightOperand;
1618 
1620  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
1621 
1623  typedef typename SelectType< evaluate, const MRT, MCT >::Type RT;
1624  //**********************************************************************************************
1625 
1626  //**Compilation flags***************************************************************************
1628  enum { vectorizable = 0 };
1629  //**********************************************************************************************
1630 
1631  //**Constructor*********************************************************************************
1637  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1638  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1639  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1640  {}
1641  //**********************************************************************************************
1642 
1643  //**Subscript operator**************************************************************************
1649  inline ReturnType operator[]( size_t index ) const {
1650  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1651  return vector_[index] * scalar_;
1652  }
1653  //**********************************************************************************************
1654 
1655  //**Size function*******************************************************************************
1660  inline size_t size() const {
1661  return vector_.size();
1662  }
1663  //**********************************************************************************************
1664 
1665  //**Left operand access*************************************************************************
1670  inline LeftOperand leftOperand() const {
1671  return vector_;
1672  }
1673  //**********************************************************************************************
1674 
1675  //**Right operand access************************************************************************
1680  inline RightOperand rightOperand() const {
1681  return scalar_;
1682  }
1683  //**********************************************************************************************
1684 
1685  //**********************************************************************************************
1691  template< typename T >
1692  inline bool canAlias( const T* alias ) const {
1693  return vector_.canAlias( alias );
1694  }
1695  //**********************************************************************************************
1696 
1697  //**********************************************************************************************
1703  template< typename T >
1704  inline bool isAliased( const T* alias ) const {
1705  return vector_.isAliased( alias );
1706  }
1707  //**********************************************************************************************
1708 
1709  private:
1710  //**Member variables****************************************************************************
1711  LeftOperand vector_;
1712  RightOperand scalar_;
1713  //**********************************************************************************************
1714 
1715  //**Assignment to dense vectors*****************************************************************
1727  template< typename VT1 // Type of the target dense vector
1728  , bool TF > // Transpose flag of the target dense vector
1729  friend inline void assign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
1730  {
1732 
1733  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1734 
1735  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1736  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1737 
1738  if( right.rows() == 0UL ) {
1739  reset( ~lhs );
1740  return;
1741  }
1742  else if( right.columns() == 0UL ) {
1743  return;
1744  }
1745 
1746  LT x( left ); // Evaluation of the left-hand side dense vector operand
1747  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1748 
1749  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1750  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1751  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1752  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1753 
1754  if( ( IsComputation<MT>::value && !evaluate ) ||
1755  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1756  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1757  else
1758  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1759  }
1760  //**********************************************************************************************
1761 
1762  //**Default assignment to dense vectors*********************************************************
1776  template< typename VT1 // Type of the left-hand side target vector
1777  , typename VT2 // Type of the left-hand side vector operand
1778  , typename MT1 // Type of the right-hand side matrix operand
1779  , typename ST2 > // Type of the scalar value
1780  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1781  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1782  {
1783  y.assign( x * A * scalar );
1784  }
1785  //**********************************************************************************************
1786 
1787  //**Vectorized default assignment to dense vectors**********************************************
1801  template< typename VT1 // Type of the left-hand side target vector
1802  , typename VT2 // Type of the left-hand side vector operand
1803  , typename MT1 // Type of the right-hand side matrix operand
1804  , typename ST2 > // Type of the scalar value
1805  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1806  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1807  {
1808  typedef IntrinsicTrait<ElementType> IT;
1809 
1810  const size_t M( A.rows() );
1811  const size_t N( A.columns() );
1812 
1813  size_t j( 0UL );
1814 
1815  for( ; (j+8UL) <= N; j+=8UL ) {
1816  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1817  for( size_t i=0UL; i<M; i+=IT::size ) {
1818  const IntrinsicType x1( x.get(i) );
1819  xmm1 = xmm1 + x1 * A.get(i,j );
1820  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1821  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1822  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1823  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1824  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1825  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1826  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1827  }
1828  y[j ] = sum( xmm1 ) * scalar;
1829  y[j+1UL] = sum( xmm2 ) * scalar;
1830  y[j+2UL] = sum( xmm3 ) * scalar;
1831  y[j+3UL] = sum( xmm4 ) * scalar;
1832  y[j+4UL] = sum( xmm5 ) * scalar;
1833  y[j+5UL] = sum( xmm6 ) * scalar;
1834  y[j+6UL] = sum( xmm7 ) * scalar;
1835  y[j+7UL] = sum( xmm8 ) * scalar;
1836  }
1837  for( ; (j+4UL) <= N; j+=4UL ) {
1838  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1839  for( size_t i=0UL; i<M; i+=IT::size ) {
1840  const IntrinsicType x1( x.get(i) );
1841  xmm1 = xmm1 + x1 * A.get(i,j );
1842  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1843  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1844  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1845  }
1846  y[j ] = sum( xmm1 ) * scalar;
1847  y[j+1UL] = sum( xmm2 ) * scalar;
1848  y[j+2UL] = sum( xmm3 ) * scalar;
1849  y[j+3UL] = sum( xmm4 ) * scalar;
1850  }
1851  for( ; (j+3UL) <= N; j+=3UL ) {
1852  IntrinsicType xmm1, xmm2, xmm3;
1853  for( size_t i=0UL; i<M; i+=IT::size ) {
1854  const IntrinsicType x1( x.get(i) );
1855  xmm1 = xmm1 + x1 * A.get(i,j );
1856  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1857  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1858  }
1859  y[j ] = sum( xmm1 ) * scalar;
1860  y[j+1UL] = sum( xmm2 ) * scalar;
1861  y[j+2UL] = sum( xmm3 ) * scalar;
1862  }
1863  for( ; (j+2UL) <= N; j+=2UL ) {
1864  IntrinsicType xmm1, xmm2;
1865  for( size_t i=0UL; i<M; i+=IT::size ) {
1866  const IntrinsicType x1( x.get(i) );
1867  xmm1 = xmm1 + x1 * A.get(i,j );
1868  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1869  }
1870  y[j ] = sum( xmm1 ) * scalar;
1871  y[j+1UL] = sum( xmm2 ) * scalar;
1872  }
1873  if( j < N ) {
1874  IntrinsicType xmm1;
1875  for( size_t i=0UL; i<M; i+=IT::size ) {
1876  xmm1 = xmm1 + A.get(i,j) * x.get(i);
1877  }
1878  y[j] = sum( xmm1 ) * scalar;
1879  }
1880  }
1881  //**********************************************************************************************
1882 
1883  //**BLAS-based assignment to dense vectors (default)********************************************
1896  template< typename VT1 // Type of the left-hand side target vector
1897  , typename VT2 // Type of the left-hand side vector operand
1898  , typename MT1 // Type of the right-hand side matrix operand
1899  , typename ST2 > // Type of the scalar value
1900  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1901  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1902  {
1903  selectDefaultAssignKernel( y, x, A, scalar );
1904  }
1905  //**********************************************************************************************
1906 
1907  //**BLAS-based assignment to dense vectors (single precision)***********************************
1908 #if BLAZE_BLAS_MODE
1909 
1922  template< typename VT1 // Type of the left-hand side target vector
1923  , typename VT2 // Type of the left-hand side vector operand
1924  , typename MT1 // Type of the right-hand side matrix operand
1925  , typename ST2 > // Type of the scalar value
1926  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1927  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1928  {
1929  using boost::numeric_cast;
1930 
1931  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1932  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1933  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1934 
1935  const int M ( numeric_cast<int>( A.rows() ) );
1936  const int N ( numeric_cast<int>( A.columns() ) );
1937  const int lda( numeric_cast<int>( A.spacing() ) );
1938 
1939  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
1940  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1941  }
1942 #endif
1943  //**********************************************************************************************
1944 
1945  //**BLAS-based assignment to dense vectors (double precision)***********************************
1946 #if BLAZE_BLAS_MODE
1947 
1960  template< typename VT1 // Type of the left-hand side target vector
1961  , typename VT2 // Type of the left-hand side vector operand
1962  , typename MT1 // Type of the right-hand side matrix operand
1963  , typename ST2 > // Type of the scalar value
1964  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1965  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1966  {
1967  using boost::numeric_cast;
1968 
1969  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1970  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1971  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1972 
1973  const int M ( numeric_cast<int>( A.rows() ) );
1974  const int N ( numeric_cast<int>( A.columns() ) );
1975  const int lda( numeric_cast<int>( A.spacing() ) );
1976 
1977  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
1978  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1979  }
1980 #endif
1981  //**********************************************************************************************
1982 
1983  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1984 #if BLAZE_BLAS_MODE
1985 
1999  template< typename VT1 // Type of the left-hand side target vector
2000  , typename VT2 // Type of the left-hand side vector operand
2001  , typename MT1 // Type of the right-hand side matrix operand
2002  , typename ST2 > // Type of the scalar value
2003  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2004  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2005  {
2006  using boost::numeric_cast;
2007 
2008  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2009  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2010  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2011  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2012  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2013  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2014 
2015  const int M ( numeric_cast<int>( A.rows() ) );
2016  const int N ( numeric_cast<int>( A.columns() ) );
2017  const int lda( numeric_cast<int>( A.spacing() ) );
2018  const complex<float> alpha( scalar );
2019  const complex<float> beta ( 0.0F, 0.0F );
2020 
2021  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2022  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2023  }
2024 #endif
2025  //**********************************************************************************************
2026 
2027  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2028 #if BLAZE_BLAS_MODE
2029 
2043  template< typename VT1 // Type of the left-hand side target vector
2044  , typename VT2 // Type of the left-hand side vector operand
2045  , typename MT1 // Type of the right-hand side matrix operand
2046  , typename ST2 > // Type of the scalar value
2047  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2048  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2049  {
2050  using boost::numeric_cast;
2051 
2052  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2053  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2054  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2055  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2056  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2057  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2058 
2059  const int M ( numeric_cast<int>( A.rows() ) );
2060  const int N ( numeric_cast<int>( A.columns() ) );
2061  const int lda( numeric_cast<int>( A.spacing() ) );
2062  const complex<double> alpha( scalar );
2063  const complex<double> beta ( 0.0, 0.0 );
2064 
2065  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2066  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2067  }
2068 #endif
2069  //**********************************************************************************************
2070 
2071  //**Assignment to sparse vectors****************************************************************
2083  template< typename VT1 // Type of the target sparse vector
2084  , bool TF > // Transpose flag of the target sparse vector
2085  friend inline void assign( SparseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2086  {
2088 
2091  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2092 
2093  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2094 
2095  const ResultType tmp( rhs );
2096  assign( ~lhs, tmp );
2097  }
2098  //**********************************************************************************************
2099 
2100  //**Addition assignment to dense vectors********************************************************
2112  template< typename VT1 // Type of the target dense vector
2113  , bool TF > // Transpose flag of the target dense vector
2114  friend inline void addAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2115  {
2117 
2118  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2119 
2120  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2121  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2122 
2123  if( right.rows() == 0UL || right.columns() == 0UL ) {
2124  return;
2125  }
2126 
2127  LT x( left ); // Evaluation of the left-hand side dense vector operand
2128  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2129 
2130  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2131  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2132  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2133  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2134 
2135  if( ( IsComputation<MT>::value && !evaluate ) ||
2136  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2137  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2138  else
2139  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2140  }
2141  //**********************************************************************************************
2142 
2143  //**Default addition assignment to dense vectors************************************************
2157  template< typename VT1 // Type of the left-hand side target vector
2158  , typename VT2 // Type of the left-hand side vector operand
2159  , typename MT1 // Type of the right-hand side matrix operand
2160  , typename ST2 > // Type of the scalar value
2161  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2162  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2163  {
2164  y.addAssign( x * A * scalar );
2165  }
2166  //**********************************************************************************************
2167 
2168  //**Vectorized default addition assignment to dense vectors*************************************
2182  template< typename VT1 // Type of the left-hand side target vector
2183  , typename VT2 // Type of the left-hand side vector operand
2184  , typename MT1 // Type of the right-hand side matrix operand
2185  , typename ST2 > // Type of the scalar value
2186  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2187  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2188  {
2189  typedef IntrinsicTrait<ElementType> IT;
2190 
2191  const size_t M( A.rows() );
2192  const size_t N( A.columns() );
2193 
2194  size_t j( 0UL );
2195 
2196  for( ; (j+8UL) <= N; j+=8UL ) {
2197  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2198  for( size_t i=0UL; i<M; i+=IT::size ) {
2199  const IntrinsicType x1( x.get(i) );
2200  xmm1 = xmm1 + x1 * A.get(i,j );
2201  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2202  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2203  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2204  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2205  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2206  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2207  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2208  }
2209  y[j ] += sum( xmm1 ) * scalar;
2210  y[j+1UL] += sum( xmm2 ) * scalar;
2211  y[j+2UL] += sum( xmm3 ) * scalar;
2212  y[j+3UL] += sum( xmm4 ) * scalar;
2213  y[j+4UL] += sum( xmm5 ) * scalar;
2214  y[j+5UL] += sum( xmm6 ) * scalar;
2215  y[j+6UL] += sum( xmm7 ) * scalar;
2216  y[j+7UL] += sum( xmm8 ) * scalar;
2217  }
2218  for( ; (j+4UL) <= N; j+=4UL ) {
2219  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2220  for( size_t i=0UL; i<M; i+=IT::size ) {
2221  const IntrinsicType x1( x.get(i) );
2222  xmm1 = xmm1 + x1 * A.get(i,j );
2223  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2224  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2225  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2226  }
2227  y[j ] += sum( xmm1 ) * scalar;
2228  y[j+1UL] += sum( xmm2 ) * scalar;
2229  y[j+2UL] += sum( xmm3 ) * scalar;
2230  y[j+3UL] += sum( xmm4 ) * scalar;
2231  }
2232  for( ; (j+3UL) <= N; j+=3UL ) {
2233  IntrinsicType xmm1, xmm2, xmm3;
2234  for( size_t i=0UL; i<M; i+=IT::size ) {
2235  const IntrinsicType x1( x.get(i) );
2236  xmm1 = xmm1 + x1 * A.get(i,j );
2237  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2238  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2239  }
2240  y[j ] += sum( xmm1 ) * scalar;
2241  y[j+1UL] += sum( xmm2 ) * scalar;
2242  y[j+2UL] += sum( xmm3 ) * scalar;
2243  }
2244  for( ; (j+2UL) <= N; j+=2UL ) {
2245  IntrinsicType xmm1, xmm2;
2246  for( size_t i=0UL; i<M; i+=IT::size ) {
2247  const IntrinsicType x1( x.get(i) );
2248  xmm1 = xmm1 + x1 * A.get(i,j );
2249  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2250  }
2251  y[j ] += sum( xmm1 ) * scalar;
2252  y[j+1UL] += sum( xmm2 ) * scalar;
2253  }
2254  if( j < N ) {
2255  IntrinsicType xmm1;
2256  for( size_t i=0UL; i<M; i+=IT::size ) {
2257  xmm1 = xmm1 + A.get(i,j) * x.get(i);
2258  }
2259  y[j] += sum( xmm1 ) * scalar;
2260  }
2261  }
2262  //**********************************************************************************************
2263 
2264  //**BLAS-based addition assignment to dense vectors (default)***********************************
2278  template< typename VT1 // Type of the left-hand side target vector
2279  , typename VT2 // Type of the left-hand side vector operand
2280  , typename MT1 // Type of the right-hand side matrix operand
2281  , typename ST2 > // Type of the scalar value
2282  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2283  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2284  {
2285  selectDefaultAddAssignKernel( y, x, A, scalar );
2286  }
2287  //**********************************************************************************************
2288 
2289  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2290 #if BLAZE_BLAS_MODE
2291 
2304  template< typename VT1 // Type of the left-hand side target vector
2305  , typename VT2 // Type of the left-hand side vector operand
2306  , typename MT1 // Type of the right-hand side matrix operand
2307  , typename ST2 > // Type of the scalar value
2308  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2309  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2310  {
2311  using boost::numeric_cast;
2312 
2313  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2314  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2315  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2316 
2317  const int M ( numeric_cast<int>( A.rows() ) );
2318  const int N ( numeric_cast<int>( A.columns() ) );
2319  const int lda( numeric_cast<int>( A.spacing() ) );
2320 
2321  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
2322  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2323  }
2324 #endif
2325  //**********************************************************************************************
2326 
2327  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2328 #if BLAZE_BLAS_MODE
2329 
2342  template< typename VT1 // Type of the left-hand side target vector
2343  , typename VT2 // Type of the left-hand side vector operand
2344  , typename MT1 // Type of the right-hand side matrix operand
2345  , typename ST2 > // Type of the scalar value
2346  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2347  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2348  {
2349  using boost::numeric_cast;
2350 
2351  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2352  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2353  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2354 
2355  const int M ( numeric_cast<int>( A.rows() ) );
2356  const int N ( numeric_cast<int>( A.columns() ) );
2357  const int lda( numeric_cast<int>( A.spacing() ) );
2358 
2359  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2360  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2361  }
2362 #endif
2363  //**********************************************************************************************
2364 
2365  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2366 #if BLAZE_BLAS_MODE
2367 
2381  template< typename VT1 // Type of the left-hand side target vector
2382  , typename VT2 // Type of the left-hand side vector operand
2383  , typename MT1 // Type of the right-hand side matrix operand
2384  , typename ST2 > // Type of the scalar value
2385  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2386  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2387  {
2388  using boost::numeric_cast;
2389 
2390  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2391  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2392  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2393  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2394  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2395  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2396 
2397  const int M ( numeric_cast<int>( A.rows() ) );
2398  const int N ( numeric_cast<int>( A.columns() ) );
2399  const int lda( numeric_cast<int>( A.spacing() ) );
2400  const complex<float> alpha( scalar );
2401  const complex<float> beta ( 1.0F, 0.0F );
2402 
2403  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2404  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2405  }
2406 #endif
2407  //**********************************************************************************************
2408 
2409  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2410 #if BLAZE_BLAS_MODE
2411 
2425  template< typename VT1 // Type of the left-hand side target vector
2426  , typename VT2 // Type of the left-hand side vector operand
2427  , typename MT1 // Type of the right-hand side matrix operand
2428  , typename ST2 > // Type of the scalar value
2429  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2430  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2431  {
2432  using boost::numeric_cast;
2433 
2434  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2435  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2436  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2437  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2438  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2439  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2440 
2441  const int M ( numeric_cast<int>( A.rows() ) );
2442  const int N ( numeric_cast<int>( A.columns() ) );
2443  const int lda( numeric_cast<int>( A.spacing() ) );
2444  const complex<double> alpha( scalar );
2445  const complex<double> beta ( 1.0, 0.0 );
2446 
2447  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2448  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2449  }
2450 #endif
2451  //**********************************************************************************************
2452 
2453  //**Addition assignment to sparse vectors*******************************************************
2454  // No special implementation for the addition assignment to sparse vectors.
2455  //**********************************************************************************************
2456 
2457  //**Subtraction assignment to dense vectors*****************************************************
2469  template< typename VT1 // Type of the target dense vector
2470  , bool TF > // Transpose flag of the target dense vector
2471  friend inline void subAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2472  {
2474 
2475  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2476 
2477  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2478  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2479 
2480  if( right.rows() == 0UL || right.columns() == 0UL ) {
2481  return;
2482  }
2483 
2484  LT x( left ); // Evaluation of the left-hand side dense vector operand
2485  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2486 
2487  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2488  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2489  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2490  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2491 
2492  if( ( IsComputation<MT>::value && !evaluate ) ||
2493  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2494  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2495  else
2496  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2497  }
2498  //**********************************************************************************************
2499 
2500  //**Default subtraction assignment to dense vectors*********************************************
2514  template< typename VT1 // Type of the left-hand side target vector
2515  , typename VT2 // Type of the left-hand side vector operand
2516  , typename MT1 // Type of the right-hand side matrix operand
2517  , typename ST2 > // Type of the scalar value
2518  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2519  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2520  {
2521  y.subAssign( x * A * scalar );
2522  }
2523  //**********************************************************************************************
2524 
2525  //**Vectorized default subtraction assignment to dense vectors**********************************
2539  template< typename VT1 // Type of the left-hand side target vector
2540  , typename VT2 // Type of the left-hand side vector operand
2541  , typename MT1 // Type of the right-hand side matrix operand
2542  , typename ST2 > // Type of the scalar value
2543  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2544  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2545  {
2546  typedef IntrinsicTrait<ElementType> IT;
2547 
2548  const size_t M( A.rows() );
2549  const size_t N( A.columns() );
2550 
2551  size_t j( 0UL );
2552 
2553  for( ; (j+8UL) <= N; j+=8UL ) {
2554  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2555  for( size_t i=0UL; i<M; i+=IT::size ) {
2556  const IntrinsicType x1( x.get(i) );
2557  xmm1 = xmm1 + x1 * A.get(i,j );
2558  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2559  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2560  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2561  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2562  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2563  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2564  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2565  }
2566  y[j ] -= sum( xmm1 ) * scalar;
2567  y[j+1UL] -= sum( xmm2 ) * scalar;
2568  y[j+2UL] -= sum( xmm3 ) * scalar;
2569  y[j+3UL] -= sum( xmm4 ) * scalar;
2570  y[j+4UL] -= sum( xmm5 ) * scalar;
2571  y[j+5UL] -= sum( xmm6 ) * scalar;
2572  y[j+6UL] -= sum( xmm7 ) * scalar;
2573  y[j+7UL] -= sum( xmm8 ) * scalar;
2574  }
2575  for( ; (j+4UL) <= N; j+=4UL ) {
2576  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2577  for( size_t i=0UL; i<M; i+=IT::size ) {
2578  const IntrinsicType x1( x.get(i) );
2579  xmm1 = xmm1 + x1 * A.get(i,j );
2580  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2581  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2582  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2583  }
2584  y[j ] -= sum( xmm1 ) * scalar;
2585  y[j+1UL] -= sum( xmm2 ) * scalar;
2586  y[j+2UL] -= sum( xmm3 ) * scalar;
2587  y[j+3UL] -= sum( xmm4 ) * scalar;
2588  }
2589  for( ; (j+3UL) <= N; j+=3UL ) {
2590  IntrinsicType xmm1, xmm2, xmm3;
2591  for( size_t i=0UL; i<M; i+=IT::size ) {
2592  const IntrinsicType x1( x.get(i) );
2593  xmm1 = xmm1 + x1 * A.get(i,j );
2594  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2595  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2596  }
2597  y[j ] -= sum( xmm1 ) * scalar;
2598  y[j+1UL] -= sum( xmm2 ) * scalar;
2599  y[j+2UL] -= sum( xmm3 ) * scalar;
2600  }
2601  for( ; (j+2UL) <= N; j+=2UL ) {
2602  IntrinsicType xmm1, xmm2;
2603  for( size_t i=0UL; i<M; i+=IT::size ) {
2604  const IntrinsicType x1( x.get(i) );
2605  xmm1 = xmm1 + x1 * A.get(i,j );
2606  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2607  }
2608  y[j ] -= sum( xmm1 ) * scalar;
2609  y[j+1UL] -= sum( xmm2 ) * scalar;
2610  }
2611  if( j < N ) {
2612  IntrinsicType xmm1;
2613  for( size_t i=0UL; i<M; i+=IT::size ) {
2614  xmm1 = xmm1 + A.get(i,j) * x.get(i);
2615  }
2616  y[j] -= sum( xmm1 ) * scalar;
2617  }
2618  }
2619  //**********************************************************************************************
2620 
2621  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2636  template< typename VT1 // Type of the left-hand side target vector
2637  , typename VT2 // Type of the left-hand side vector operand
2638  , typename MT1 // Type of the right-hand side matrix operand
2639  , typename ST2 > // Type of the scalar value
2640  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2641  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2642  {
2643  selectDefaultSubAssignKernel( y, x, A, scalar );
2644  }
2645  //**********************************************************************************************
2646 
2647  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2648 #if BLAZE_BLAS_MODE
2649 
2662  template< typename VT1 // Type of the left-hand side target vector
2663  , typename VT2 // Type of the left-hand side vector operand
2664  , typename MT1 // Type of the right-hand side matrix operand
2665  , typename ST2 > // Type of the scalar value
2666  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2667  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2668  {
2669  using boost::numeric_cast;
2670 
2671  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2672  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2673  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2674 
2675  const int M ( numeric_cast<int>( A.rows() ) );
2676  const int N ( numeric_cast<int>( A.columns() ) );
2677  const int lda( numeric_cast<int>( A.spacing() ) );
2678 
2679  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2680  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2681  }
2682 #endif
2683  //**********************************************************************************************
2684 
2685  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2686 #if BLAZE_BLAS_MODE
2687 
2700  template< typename VT1 // Type of the left-hand side target vector
2701  , typename VT2 // Type of the left-hand side vector operand
2702  , typename MT1 // Type of the right-hand side matrix operand
2703  , typename ST2 > // Type of the scalar value
2704  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2705  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2706  {
2707  using boost::numeric_cast;
2708 
2709  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2710  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2711  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2712 
2713  const int M ( numeric_cast<int>( A.rows() ) );
2714  const int N ( numeric_cast<int>( A.columns() ) );
2715  const int lda( numeric_cast<int>( A.spacing() ) );
2716 
2717  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2718  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2719  }
2720 #endif
2721  //**********************************************************************************************
2722 
2723  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2724 #if BLAZE_BLAS_MODE
2725 
2740  template< typename VT1 // Type of the left-hand side target vector
2741  , typename VT2 // Type of the left-hand side vector operand
2742  , typename MT1 // Type of the right-hand side matrix operand
2743  , typename ST2 > // Type of the scalar value
2744  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2745  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2746  {
2747  using boost::numeric_cast;
2748 
2749  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2750  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2751  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2752  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2753  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2754  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2755 
2756  const int M ( numeric_cast<int>( A.rows() ) );
2757  const int N ( numeric_cast<int>( A.columns() ) );
2758  const int lda( numeric_cast<int>( A.spacing() ) );
2759  const complex<float> alpha( -scalar );
2760  const complex<float> beta ( 1.0F, 0.0F );
2761 
2762  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2763  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2764  }
2765 #endif
2766  //**********************************************************************************************
2767 
2768  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2769 #if BLAZE_BLAS_MODE
2770 
2785  template< typename VT1 // Type of the left-hand side target vector
2786  , typename VT2 // Type of the left-hand side vector operand
2787  , typename MT1 // Type of the right-hand side matrix operand
2788  , typename ST2 > // Type of the scalar value
2789  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2790  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2791  {
2792  using boost::numeric_cast;
2793 
2794  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2795  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2796  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2797  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2798  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2799  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2800 
2801  const int M ( numeric_cast<int>( A.rows() ) );
2802  const int N ( numeric_cast<int>( A.columns() ) );
2803  const int lda( numeric_cast<int>( A.spacing() ) );
2804  const complex<double> alpha( -scalar );
2805  const complex<double> beta ( 1.0, 0.0 );
2806 
2807  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2808  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2809  }
2810 #endif
2811  //**********************************************************************************************
2812 
2813  //**Subtraction assignment to sparse vectors****************************************************
2814  // No special implementation for the subtraction assignment to sparse vectors.
2815  //**********************************************************************************************
2816 
2817  //**Multiplication assignment to dense vectors**************************************************
2829  template< typename VT1 // Type of the target dense vector
2830  , bool TF > // Transpose flag of the target dense vector
2831  friend inline void multAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2832  {
2834 
2837  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2838 
2839  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2840 
2841  const ResultType tmp( rhs );
2842  multAssign( ~lhs, tmp );
2843  }
2844  //**********************************************************************************************
2845 
2846  //**Multiplication assignment to sparse vectors*******************************************************
2847  // No special implementation for the multiplication assignment to sparse vectors.
2848  //**********************************************************************************************
2849 
2850  //**Compile time checks*************************************************************************
2859  //**********************************************************************************************
2860 };
2862 //*************************************************************************************************
2863 
2864 
2865 
2866 
2867 //=================================================================================================
2868 //
2869 // GLOBAL BINARY ARITHMETIC OPERATORS
2870 //
2871 //=================================================================================================
2872 
2873 //*************************************************************************************************
2904 template< typename T1 // Type of the left-hand side dense vector
2905  , typename T2 > // Type of the right-hand side dense matrix
2906 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecTDMatMultExpr<T1,T2> >::Type
2908 {
2910 
2911  if( (~vec).size() != (~mat).rows() )
2912  throw std::invalid_argument( "Vector and matrix sizes do not match" );
2913 
2914  return TDVecTDMatMultExpr<T1,T2>( ~vec, ~mat );
2915 }
2916 //*************************************************************************************************
2917 
2918 } // namespace blaze
2919 
2920 #endif