All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
49 #include <blaze/system/BLAS.h>
51 #include <blaze/util/Assert.h>
52 #include <blaze/util/Complex.h>
57 #include <blaze/util/DisableIf.h>
58 #include <blaze/util/EnableIf.h>
59 #include <blaze/util/SelectType.h>
60 #include <blaze/util/Types.h>
66 
67 
68 namespace blaze {
69 
70 //=================================================================================================
71 //
72 // CLASS TDVECTDMATMULTEXPR
73 //
74 //=================================================================================================
75 
76 //*************************************************************************************************
83 template< typename VT // Type of the left-hand side dense vector
84  , typename MT > // Type of the right-hand side dense matrix
85 class TDVecTDMatMultExpr : public DenseVector< TDVecTDMatMultExpr<VT,MT>, true >
86  , private Expression
87  , private Computation
88 {
89  private:
90  //**Type definitions****************************************************************************
91  typedef typename VT::ResultType VRT;
92  typedef typename MT::ResultType MRT;
93  typedef typename VRT::ElementType VET;
94  typedef typename MRT::ElementType MET;
95  typedef typename VT::CompositeType VCT;
96  typedef typename MT::CompositeType MCT;
97  //**********************************************************************************************
98 
99  //**********************************************************************************************
101  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
103  //**********************************************************************************************
104 
105  //**********************************************************************************************
107 
108 
111  template< typename T1, typename T2, typename T3 >
112  struct UseSinglePrecisionKernel {
116  };
118  //**********************************************************************************************
119 
120  //**********************************************************************************************
122 
123 
126  template< typename T1, typename T2, typename T3 >
127  struct UseDoublePrecisionKernel {
128  enum { value = IsDouble<typename T1::ElementType>::value &&
129  IsDouble<typename T2::ElementType>::value &&
130  IsDouble<typename T3::ElementType>::value };
131  };
133  //**********************************************************************************************
134 
135  //**********************************************************************************************
137 
138 
141  template< typename T1, typename T2, typename T3 >
142  struct UseSinglePrecisionComplexKernel {
143  typedef complex<float> Type;
144  enum { value = IsSame<typename T1::ElementType,Type>::value &&
145  IsSame<typename T2::ElementType,Type>::value &&
146  IsSame<typename T3::ElementType,Type>::value };
147  };
149  //**********************************************************************************************
150 
151  //**********************************************************************************************
153 
154 
157  template< typename T1, typename T2, typename T3 >
158  struct UseDoublePrecisionComplexKernel {
159  typedef complex<double> Type;
160  enum { value = IsSame<typename T1::ElementType,Type>::value &&
161  IsSame<typename T2::ElementType,Type>::value &&
162  IsSame<typename T3::ElementType,Type>::value };
163  };
165  //**********************************************************************************************
166 
167  //**********************************************************************************************
169 
170 
172  template< typename T1, typename T2, typename T3 >
173  struct UseDefaultKernel {
174  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
175  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
176  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
177  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
178  };
180  //**********************************************************************************************
181 
182  //**********************************************************************************************
184 
185 
188  template< typename T1, typename T2, typename T3 >
189  struct UseVectorizedDefaultKernel {
190  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
191  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
192  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
193  IntrinsicTrait<typename T1::ElementType>::addition &&
194  IntrinsicTrait<typename T1::ElementType>::multiplication };
195  };
197  //**********************************************************************************************
198 
199  public:
200  //**Type definitions****************************************************************************
203  typedef typename ResultType::TransposeType TransposeType;
204  typedef typename ResultType::ElementType ElementType;
206  typedef const ElementType ReturnType;
207  typedef const ResultType CompositeType;
208 
210  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
211 
213  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
214 
216  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
217 
220  //**********************************************************************************************
221 
222  //**Compilation flags***************************************************************************
224  enum { vectorizable = 0 };
225 
227  enum { canAlias = ( !IsComputation<VT>::value ) ||
228  ( !evaluate && IsComputation<MT>::value &&
230  //**********************************************************************************************
231 
232  //**Constructor*********************************************************************************
238  explicit inline TDVecTDMatMultExpr( const VT& vec, const MT& mat )
239  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
240  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
241  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
242  {
243  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
244  }
245  //**********************************************************************************************
246 
247  //**Subscript operator**************************************************************************
253  inline ReturnType operator[]( size_t index ) const {
254  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
255 
256  ElementType res;
257 
258  if( mat_.rows() != 0UL ) {
259  res = vec_[0UL] * mat_(0UL,index);
260  for( size_t j=1UL; j<end_; j+=2UL ) {
261  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
262  }
263  if( end_ < mat_.rows() ) {
264  res += vec_[end_] * mat_(end_,index);
265  }
266  }
267  else {
268  reset( res );
269  }
270 
271  return res;
272  }
273  //**********************************************************************************************
274 
275  //**Size function*******************************************************************************
280  inline size_t size() const {
281  return mat_.columns();
282  }
283  //**********************************************************************************************
284 
285  //**Left operand access*************************************************************************
290  inline LeftOperand leftOperand() const {
291  return vec_;
292  }
293  //**********************************************************************************************
294 
295  //**Right operand access************************************************************************
300  inline RightOperand rightOperand() const {
301  return mat_;
302  }
303  //**********************************************************************************************
304 
305  //**********************************************************************************************
311  template< typename T >
312  inline bool isAliased( const T* alias ) const {
313  return ( !IsComputation<VT>::value && vec_.isAliased( alias ) ) ||
315  CanAlias<MT>::value && mat_.isAliased( alias ) );
316  }
317  //**********************************************************************************************
318 
319  private:
320  //**Member variables****************************************************************************
323  const size_t end_;
324  //**********************************************************************************************
325 
326  //**Assignment to dense vectors*****************************************************************
339  template< typename VT1 > // Type of the target dense vector
340  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
341  {
342  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
343 
344  if( rhs.mat_.rows() == 0UL ) {
345  reset( ~lhs );
346  return;
347  }
348  else if( rhs.mat_.columns() == 0UL ) {
349  return;
350  }
351 
352  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
353  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
354 
355  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
356  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
357  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
358  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
359 
360  if( ( IsComputation<MT>::value && !evaluate ) ||
361  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
362  TDVecTDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
363  else
364  TDVecTDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
365  }
367  //**********************************************************************************************
368 
369  //**Default assignment to dense vectors*********************************************************
383  template< typename VT1 // Type of the left-hand side target vector
384  , typename VT2 // Type of the left-hand side vector operand
385  , typename MT1 > // Type of the right-hand side matrix operand
386  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
387  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
388  {
389  y.assign( x * A );
390  }
392  //**********************************************************************************************
393 
394  //**Vectorized default assignment to dense vectors**********************************************
408  template< typename VT1 // Type of the left-hand side target vector
409  , typename VT2 // Type of the left-hand side vector operand
410  , typename MT1 > // Type of the right-hand side matrix operand
411  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
412  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
413  {
414  typedef IntrinsicTrait<ElementType> IT;
415 
416  const size_t M( A.rows() );
417  const size_t N( A.columns() );
418 
419  size_t j( 0UL );
420 
421  for( ; (j+8UL) <= N; j+=8UL ) {
422  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
423  for( size_t i=0UL; i<M; i+=IT::size ) {
424  const IntrinsicType x1( x.get(i) );
425  xmm1 = xmm1 + x1 * A.get(i,j );
426  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
427  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
428  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
429  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
430  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
431  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
432  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
433  }
434  y[j ] = sum( xmm1 );
435  y[j+1UL] = sum( xmm2 );
436  y[j+2UL] = sum( xmm3 );
437  y[j+3UL] = sum( xmm4 );
438  y[j+4UL] = sum( xmm5 );
439  y[j+5UL] = sum( xmm6 );
440  y[j+6UL] = sum( xmm7 );
441  y[j+7UL] = sum( xmm8 );
442  }
443  for( ; (j+4UL) <= N; j+=4UL ) {
444  IntrinsicType xmm1, xmm2, xmm3, xmm4;
445  for( size_t i=0UL; i<M; i+=IT::size ) {
446  const IntrinsicType x1( x.get(i) );
447  xmm1 = xmm1 + x1 * A.get(i,j );
448  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
449  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
450  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
451  }
452  y[j ] = sum( xmm1 );
453  y[j+1UL] = sum( xmm2 );
454  y[j+2UL] = sum( xmm3 );
455  y[j+3UL] = sum( xmm4 );
456  }
457  for( ; (j+3UL) <= N; j+=3UL ) {
458  IntrinsicType xmm1, xmm2, xmm3;
459  for( size_t i=0UL; i<M; i+=IT::size ) {
460  const IntrinsicType x1( x.get(i) );
461  xmm1 = xmm1 + x1 * A.get(i,j );
462  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
463  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
464  }
465  y[j ] = sum( xmm1 );
466  y[j+1UL] = sum( xmm2 );
467  y[j+2UL] = sum( xmm3 );
468  }
469  for( ; (j+2UL) <= N; j+=2UL ) {
470  IntrinsicType xmm1, xmm2;
471  for( size_t i=0UL; i<M; i+=IT::size ) {
472  const IntrinsicType x1( x.get(i) );
473  xmm1 = xmm1 + x1 * A.get(i,j );
474  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
475  }
476  y[j ] = sum( xmm1 );
477  y[j+1UL] = sum( xmm2 );
478  }
479  if( j < N ) {
480  IntrinsicType xmm1;
481  for( size_t i=0UL; i<M; i+=IT::size ) {
482  xmm1 = xmm1 + A.get(i,j) * x.get(i);
483  }
484  y[j] = sum( xmm1 );
485  }
486  }
488  //**********************************************************************************************
489 
490  //**BLAS-based assignment to dense vectors (default)********************************************
504  template< typename VT1 // Type of the left-hand side target vector
505  , typename VT2 // Type of the left-hand side vector operand
506  , typename MT1 > // Type of the right-hand side matrix operand
507  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
508  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
509  {
510  selectDefaultAssignKernel( y, x, A );
511  }
513  //**********************************************************************************************
514 
515  //**BLAS-based assignment to dense vectors (single precision)***********************************
516 #if BLAZE_BLAS_MODE
517 
530  template< typename VT1 // Type of the left-hand side target vector
531  , typename VT2 // Type of the left-hand side vector operand
532  , typename MT1 > // Type of the right-hand side matrix operand
533  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
534  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
535  {
536  using boost::numeric_cast;
537 
538  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
539  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
540  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
541 
542  const int M ( numeric_cast<int>( A.rows() ) );
543  const int N ( numeric_cast<int>( A.columns() ) );
544  const int lda( numeric_cast<int>( A.spacing() ) );
545 
546  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
547  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
548  }
550 #endif
551  //**********************************************************************************************
552 
553  //**BLAS-based assignment to dense vectors (double precision)***********************************
554 #if BLAZE_BLAS_MODE
555 
568  template< typename VT1 // Type of the left-hand side target vector
569  , typename VT2 // Type of the left-hand side vector operand
570  , typename MT1 > // Type of the right-hand side matrix operand
571  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
572  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
573  {
574  using boost::numeric_cast;
575 
576  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
577  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
578  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
579 
580  const int M ( numeric_cast<int>( A.rows() ) );
581  const int N ( numeric_cast<int>( A.columns() ) );
582  const int lda( numeric_cast<int>( A.spacing() ) );
583 
584  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
585  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
586  }
588 #endif
589  //**********************************************************************************************
590 
591  //**BLAS-based assignment to dense vectors (single precision complex)***************************
592 #if BLAZE_BLAS_MODE
593 
606  template< typename VT1 // Type of the left-hand side target vector
607  , typename VT2 // Type of the left-hand side vector operand
608  , typename MT1 > // Type of the right-hand side matrix operand
609  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
610  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
611  {
612  using boost::numeric_cast;
613 
614  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
615  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
616  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
617  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
618  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
619  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
620 
621  const int M ( numeric_cast<int>( A.rows() ) );
622  const int N ( numeric_cast<int>( A.columns() ) );
623  const int lda( numeric_cast<int>( A.spacing() ) );
624  const complex<float> alpha( 1.0F, 0.0F );
625  const complex<float> beta ( 0.0F, 0.0F );
626 
627  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
628  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
629  }
631 #endif
632  //**********************************************************************************************
633 
634  //**BLAS-based assignment to dense vectors (double precision complex)***************************
635 #if BLAZE_BLAS_MODE
636 
649  template< typename VT1 // Type of the left-hand side target vector
650  , typename VT2 // Type of the left-hand side vector operand
651  , typename MT1 > // Type of the right-hand side matrix operand
652  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
653  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
654  {
655  using boost::numeric_cast;
656 
657  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
658  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
659  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
660  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
661  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
662  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
663 
664  const int M ( numeric_cast<int>( A.rows() ) );
665  const int N ( numeric_cast<int>( A.columns() ) );
666  const int lda( numeric_cast<int>( A.spacing() ) );
667  const complex<double> alpha( 1.0, 0.0 );
668  const complex<double> beta ( 0.0, 0.0 );
669 
670  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
671  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
672  }
674 #endif
675  //**********************************************************************************************
676 
677  //**Assignment to sparse vectors****************************************************************
690  template< typename VT1 > // Type of the target sparse vector
691  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
692  {
695  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
696 
697  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
698 
699  const ResultType tmp( rhs );
700  assign( ~lhs, tmp );
701  }
703  //**********************************************************************************************
704 
705  //**Addition assignment to dense vectors********************************************************
718  template< typename VT1 > // Type of the target dense vector
719  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
720  {
721  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
722 
723  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
724  return;
725  }
726 
727  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
728  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
729 
730  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
731  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
732  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
733  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
734 
735  if( ( IsComputation<MT>::value && !evaluate ) ||
736  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
737  TDVecTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
738  else
739  TDVecTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
740  }
742  //**********************************************************************************************
743 
744  //**Default addition assignment to dense vectors************************************************
758  template< typename VT1 // Type of the left-hand side target vector
759  , typename VT2 // Type of the left-hand side vector operand
760  , typename MT1 > // Type of the right-hand side matrix operand
761  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
762  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
763  {
764  y.addAssign( x * A );
765  }
767  //**********************************************************************************************
768 
769  //**Vectorized default addition assignment to dense vectors*************************************
783  template< typename VT1 // Type of the left-hand side target vector
784  , typename VT2 // Type of the left-hand side vector operand
785  , typename MT1 > // Type of the right-hand side matrix operand
786  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
787  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
788  {
789  typedef IntrinsicTrait<ElementType> IT;
790 
791  const size_t M( A.rows() );
792  const size_t N( A.columns() );
793 
794  size_t j( 0UL );
795 
796  for( ; (j+8UL) <= N; j+=8UL ) {
797  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
798  for( size_t i=0UL; i<M; i+=IT::size ) {
799  const IntrinsicType x1( x.get(i) );
800  xmm1 = xmm1 + x1 * A.get(i,j );
801  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
802  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
803  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
804  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
805  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
806  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
807  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
808  }
809  y[j ] += sum( xmm1 );
810  y[j+1UL] += sum( xmm2 );
811  y[j+2UL] += sum( xmm3 );
812  y[j+3UL] += sum( xmm4 );
813  y[j+4UL] += sum( xmm5 );
814  y[j+5UL] += sum( xmm6 );
815  y[j+6UL] += sum( xmm7 );
816  y[j+7UL] += sum( xmm8 );
817  }
818  for( ; (j+4UL) <= N; j+=4UL ) {
819  IntrinsicType xmm1, xmm2, xmm3, xmm4;
820  for( size_t i=0UL; i<M; i+=IT::size ) {
821  const IntrinsicType x1( x.get(i) );
822  xmm1 = xmm1 + x1 * A.get(i,j );
823  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
824  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
825  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
826  }
827  y[j ] += sum( xmm1 );
828  y[j+1UL] += sum( xmm2 );
829  y[j+2UL] += sum( xmm3 );
830  y[j+3UL] += sum( xmm4 );
831  }
832  for( ; (j+3UL) <= N; j+=3UL ) {
833  IntrinsicType xmm1, xmm2, xmm3;
834  for( size_t i=0UL; i<M; i+=IT::size ) {
835  const IntrinsicType x1( x.get(i) );
836  xmm1 = xmm1 + x1 * A.get(i,j );
837  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
838  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
839  }
840  y[j ] += sum( xmm1 );
841  y[j+1UL] += sum( xmm2 );
842  y[j+2UL] += sum( xmm3 );
843  }
844  for( ; (j+2UL) <= N; j+=2UL ) {
845  IntrinsicType xmm1, xmm2;
846  for( size_t i=0UL; i<M; i+=IT::size ) {
847  const IntrinsicType x1( x.get(i) );
848  xmm1 = xmm1 + x1 * A.get(i,j );
849  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
850  }
851  y[j ] += sum( xmm1 );
852  y[j+1UL] += sum( xmm2 );
853  }
854  if( j < N ) {
855  IntrinsicType xmm1;
856  for( size_t i=0UL; i<M; i+=IT::size ) {
857  xmm1 = xmm1 + A.get(i,j) * x.get(i);
858  }
859  y[j] += sum( xmm1 );
860  }
861  }
863  //**********************************************************************************************
864 
865  //**BLAS-based addition assignment to dense vectors (default)***********************************
879  template< typename VT1 // Type of the left-hand side target vector
880  , typename VT2 // Type of the left-hand side vector operand
881  , typename MT1 > // Type of the right-hand side matrix operand
882  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
883  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
884  {
885  selectDefaultAddAssignKernel( y, x, A );
886  }
888  //**********************************************************************************************
889 
890  //**BLAS-based addition assignment to dense vectors (single precision)**************************
891 #if BLAZE_BLAS_MODE
892 
905  template< typename VT1 // Type of the left-hand side target vector
906  , typename VT2 // Type of the left-hand side vector operand
907  , typename MT1 > // Type of the right-hand side matrix operand
908  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
909  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
910  {
911  using boost::numeric_cast;
912 
913  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
914  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
915  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
916 
917  const int M ( numeric_cast<int>( A.rows() ) );
918  const int N ( numeric_cast<int>( A.columns() ) );
919  const int lda( numeric_cast<int>( A.spacing() ) );
920 
921  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
922  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
923  }
925 #endif
926  //**********************************************************************************************
927 
928  //**BLAS-based addition assignment to dense vectors (double precision)**************************
929 #if BLAZE_BLAS_MODE
930 
943  template< typename VT1 // Type of the left-hand side target vector
944  , typename VT2 // Type of the left-hand side vector operand
945  , typename MT1 > // Type of the right-hand side matrix operand
946  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
947  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
948  {
949  using boost::numeric_cast;
950 
951  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
952  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
953  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
954 
955  const int M ( numeric_cast<int>( A.rows() ) );
956  const int N ( numeric_cast<int>( A.columns() ) );
957  const int lda( numeric_cast<int>( A.spacing() ) );
958 
959  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
960  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
961  }
963 #endif
964  //**********************************************************************************************
965 
966  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
967 #if BLAZE_BLAS_MODE
968 
981  template< typename VT1 // Type of the left-hand side target vector
982  , typename VT2 // Type of the left-hand side vector operand
983  , typename MT1 > // Type of the right-hand side matrix operand
984  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
985  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
986  {
987  using boost::numeric_cast;
988 
989  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
990  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
991  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
992  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
993  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
994  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
995 
996  const int M ( numeric_cast<int>( A.rows() ) );
997  const int N ( numeric_cast<int>( A.columns() ) );
998  const int lda( numeric_cast<int>( A.spacing() ) );
999  const complex<float> alpha( 1.0F, 0.0F );
1000  const complex<float> beta ( 1.0F, 0.0F );
1001 
1002  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1003  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1004  }
1006 #endif
1007  //**********************************************************************************************
1008 
1009  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1010 #if BLAZE_BLAS_MODE
1011 
1024  template< typename VT1 // Type of the left-hand side target vector
1025  , typename VT2 // Type of the left-hand side vector operand
1026  , typename MT1 > // Type of the right-hand side matrix operand
1027  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1028  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1029  {
1030  using boost::numeric_cast;
1031 
1032  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1033  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1034  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1035  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1036  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1037  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1038 
1039  const int M ( numeric_cast<int>( A.rows() ) );
1040  const int N ( numeric_cast<int>( A.columns() ) );
1041  const int lda( numeric_cast<int>( A.spacing() ) );
1042  const complex<double> alpha( 1.0, 0.0 );
1043  const complex<double> beta ( 1.0, 0.0 );
1044 
1045  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1046  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1047  }
1049 #endif
1050  //**********************************************************************************************
1051 
1052  //**Addition assignment to sparse vectors*******************************************************
1053  // No special implementation for the addition assignment to sparse vectors.
1054  //**********************************************************************************************
1055 
1056  //**Subtraction assignment to dense vectors*****************************************************
1069  template< typename VT1 > // Type of the target dense vector
1070  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1071  {
1072  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1073 
1074  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1075  return;
1076  }
1077 
1078  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1079  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1080 
1081  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1082  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1083  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1084  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1085 
1086  if( ( IsComputation<MT>::value && !evaluate ) ||
1087  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1088  TDVecTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1089  else
1090  TDVecTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1091  }
1093  //**********************************************************************************************
1094 
1095  //**Default subtraction assignment to dense vectors*********************************************
1109  template< typename VT1 // Type of the left-hand side target vector
1110  , typename VT2 // Type of the left-hand side vector operand
1111  , typename MT1 > // Type of the right-hand side matrix operand
1112  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1113  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1114  {
1115  y.subAssign( x * A );
1116  }
1118  //**********************************************************************************************
1119 
1120  //**Vectorized default subtraction assignment to dense vectors**********************************
1134  template< typename VT1 // Type of the left-hand side target vector
1135  , typename VT2 // Type of the left-hand side vector operand
1136  , typename MT1 > // Type of the right-hand side matrix operand
1137  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1138  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1139  {
1140  typedef IntrinsicTrait<ElementType> IT;
1141 
1142  const size_t M( A.rows() );
1143  const size_t N( A.columns() );
1144 
1145  size_t j( 0UL );
1146 
1147  for( ; (j+8UL) <= N; j+=8UL ) {
1148  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1149  for( size_t i=0UL; i<M; i+=IT::size ) {
1150  const IntrinsicType x1( x.get(i) );
1151  xmm1 = xmm1 + x1 * A.get(i,j );
1152  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1153  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1154  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1155  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1156  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1157  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1158  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1159  }
1160  y[j ] -= sum( xmm1 );
1161  y[j+1UL] -= sum( xmm2 );
1162  y[j+2UL] -= sum( xmm3 );
1163  y[j+3UL] -= sum( xmm4 );
1164  y[j+4UL] -= sum( xmm5 );
1165  y[j+5UL] -= sum( xmm6 );
1166  y[j+6UL] -= sum( xmm7 );
1167  y[j+7UL] -= sum( xmm8 );
1168  }
1169  for( ; (j+4UL) <= N; j+=4UL ) {
1170  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1171  for( size_t i=0UL; i<M; i+=IT::size ) {
1172  const IntrinsicType x1( x.get(i) );
1173  xmm1 = xmm1 + x1 * A.get(i,j );
1174  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1175  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1176  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1177  }
1178  y[j ] -= sum( xmm1 );
1179  y[j+1UL] -= sum( xmm2 );
1180  y[j+2UL] -= sum( xmm3 );
1181  y[j+3UL] -= sum( xmm4 );
1182  }
1183  for( ; (j+3UL) <= N; j+=3UL ) {
1184  IntrinsicType xmm1, xmm2, xmm3;
1185  for( size_t i=0UL; i<M; i+=IT::size ) {
1186  const IntrinsicType x1( x.get(i) );
1187  xmm1 = xmm1 + x1 * A.get(i,j );
1188  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1189  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1190  }
1191  y[j ] -= sum( xmm1 );
1192  y[j+1UL] -= sum( xmm2 );
1193  y[j+2UL] -= sum( xmm3 );
1194  }
1195  for( ; (j+2UL) <= N; j+=2UL ) {
1196  IntrinsicType xmm1, xmm2;
1197  for( size_t i=0UL; i<M; i+=IT::size ) {
1198  const IntrinsicType x1( x.get(i) );
1199  xmm1 = xmm1 + x1 * A.get(i,j );
1200  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1201  }
1202  y[j ] -= sum( xmm1 );
1203  y[j+1UL] -= sum( xmm2 );
1204  }
1205  if( j < N ) {
1206  IntrinsicType xmm1;
1207  for( size_t i=0UL; i<M; i+=IT::size ) {
1208  xmm1 = xmm1 + A.get(i,j) * x.get(i);
1209  }
1210  y[j] -= sum( xmm1 );
1211  }
1212  }
1214  //**********************************************************************************************
1215 
1216  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1230  template< typename VT1 // Type of the left-hand side target vector
1231  , typename VT2 // Type of the left-hand side vector operand
1232  , typename MT1 > // Type of the right-hand side matrix operand
1233  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1234  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1235  {
1236  selectDefaultSubAssignKernel( y, x, A );
1237  }
1239  //**********************************************************************************************
1240 
1241  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1242 #if BLAZE_BLAS_MODE
1243 
1256  template< typename VT1 // Type of the left-hand side target vector
1257  , typename VT2 // Type of the left-hand side vector operand
1258  , typename MT1 > // Type of the right-hand side matrix operand
1259  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1260  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1261  {
1262  using boost::numeric_cast;
1263 
1264  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1265  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1266  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1267 
1268  const int M ( numeric_cast<int>( A.rows() ) );
1269  const int N ( numeric_cast<int>( A.columns() ) );
1270  const int lda( numeric_cast<int>( A.spacing() ) );
1271 
1272  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -1.0F,
1273  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1274  }
1276 #endif
1277  //**********************************************************************************************
1278 
1279  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1280 #if BLAZE_BLAS_MODE
1281 
1294  template< typename VT1 // Type of the left-hand side target vector
1295  , typename VT2 // Type of the left-hand side vector operand
1296  , typename MT1 > // Type of the right-hand side matrix operand
1297  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1298  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1299  {
1300  using boost::numeric_cast;
1301 
1302  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1303  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1304  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1305 
1306  const int M ( numeric_cast<int>( A.rows() ) );
1307  const int N ( numeric_cast<int>( A.columns() ) );
1308  const int lda( numeric_cast<int>( A.spacing() ) );
1309 
1310  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -1.0,
1311  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1312  }
1314 #endif
1315  //**********************************************************************************************
1316 
1317  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1318 #if BLAZE_BLAS_MODE
1319 
1332  template< typename VT1 // Type of the left-hand side target vector
1333  , typename VT2 // Type of the left-hand side vector operand
1334  , typename MT1 > // Type of the right-hand side matrix operand
1335  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1336  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1337  {
1338  using boost::numeric_cast;
1339 
1340  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1341  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1342  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1343  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1344  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1345  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1346 
1347  const int M ( numeric_cast<int>( A.rows() ) );
1348  const int N ( numeric_cast<int>( A.columns() ) );
1349  const int lda( numeric_cast<int>( A.spacing() ) );
1350  const complex<float> alpha( -1.0F, 0.0F );
1351  const complex<float> beta ( 1.0F, 0.0F );
1352 
1353  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1354  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1355  }
1357 #endif
1358  //**********************************************************************************************
1359 
1360  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1361 #if BLAZE_BLAS_MODE
1362 
1375  template< typename VT1 // Type of the left-hand side target vector
1376  , typename VT2 // Type of the left-hand side vector operand
1377  , typename MT1 > // Type of the right-hand side matrix operand
1378  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1379  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1380  {
1381  using boost::numeric_cast;
1382 
1383  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1384  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1385  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1386  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1387  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1388  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1389 
1390  const int M ( numeric_cast<int>( A.rows() ) );
1391  const int N ( numeric_cast<int>( A.columns() ) );
1392  const int lda( numeric_cast<int>( A.spacing() ) );
1393  const complex<double> alpha( -1.0, 0.0 );
1394  const complex<double> beta ( 1.0, 0.0 );
1395 
1396  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1397  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1398  }
1400 #endif
1401  //**********************************************************************************************
1402 
1403  //**Subtraction assignment to sparse vectors****************************************************
1404  // No special implementation for the subtraction assignment to sparse vectors.
1405  //**********************************************************************************************
1406 
1407  //**Multiplication assignment to dense vectors**************************************************
1420  template< typename VT1 > // Type of the target dense vector
1421  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1422  {
1425  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1426 
1427  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1428 
1429  const ResultType tmp( rhs );
1430  multAssign( ~lhs, tmp );
1431  }
1433  //**********************************************************************************************
1434 
1435  //**Multiplication assignment to sparse vectors*******************************************************
1436  // No special implementation for the multiplication assignment to sparse vectors.
1437  //**********************************************************************************************
1438 
1439  //**Compile time checks*************************************************************************
1446  //**********************************************************************************************
1447 };
1448 //*************************************************************************************************
1449 
1450 
1451 
1452 
1453 //=================================================================================================
1454 //
1455 // DVECSCALARMULTEXPR SPECIALIZATION
1456 //
1457 //=================================================================================================
1458 
1459 //*************************************************************************************************
1467 template< typename VT // Type of the left-hand side dense vector
1468  , typename MT // Type of the right-hand side dense matrix
1469  , typename ST > // Type of the side scalar value
1470 class DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >
1471  : public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
1472  , private Expression
1473  , private Computation
1474 {
1475  private:
1476  //**Type definitions****************************************************************************
1477  typedef TDVecTDMatMultExpr<VT,MT> VMM;
1478  typedef typename VMM::ResultType RES;
1479  typedef typename VT::ResultType VRT;
1480  typedef typename MT::ResultType MRT;
1481  typedef typename VRT::ElementType VET;
1482  typedef typename MRT::ElementType MET;
1483  typedef typename VT::CompositeType VCT;
1484  typedef typename MT::CompositeType MCT;
1485  //**********************************************************************************************
1486 
1487  //**********************************************************************************************
1489  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1490  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1491  //**********************************************************************************************
1492 
1493  //**********************************************************************************************
1495 
1498  template< typename T1, typename T2, typename T3, typename T4 >
1499  struct UseSinglePrecisionKernel {
1500  enum { value = IsFloat<typename T1::ElementType>::value &&
1501  IsFloat<typename T2::ElementType>::value &&
1502  IsFloat<typename T3::ElementType>::value &&
1503  !IsComplex<T4>::value };
1504  };
1505  //**********************************************************************************************
1506 
1507  //**********************************************************************************************
1509 
1512  template< typename T1, typename T2, typename T3, typename T4 >
1513  struct UseDoublePrecisionKernel {
1514  enum { value = IsDouble<typename T1::ElementType>::value &&
1515  IsDouble<typename T2::ElementType>::value &&
1516  IsDouble<typename T3::ElementType>::value &&
1517  !IsComplex<T4>::value };
1518  };
1519  //**********************************************************************************************
1520 
1521  //**********************************************************************************************
1523 
1526  template< typename T1, typename T2, typename T3 >
1527  struct UseSinglePrecisionComplexKernel {
1528  typedef complex<float> Type;
1529  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1530  IsSame<typename T2::ElementType,Type>::value &&
1531  IsSame<typename T3::ElementType,Type>::value };
1532  };
1533  //**********************************************************************************************
1534 
1535  //**********************************************************************************************
1537 
1540  template< typename T1, typename T2, typename T3 >
1541  struct UseDoublePrecisionComplexKernel {
1542  typedef complex<double> Type;
1543  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1544  IsSame<typename T2::ElementType,Type>::value &&
1545  IsSame<typename T3::ElementType,Type>::value };
1546  };
1547  //**********************************************************************************************
1548 
1549  //**********************************************************************************************
1551 
1553  template< typename T1, typename T2, typename T3, typename T4 >
1554  struct UseDefaultKernel {
1555  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1556  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1557  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1558  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1559  };
1560  //**********************************************************************************************
1561 
1562  //**********************************************************************************************
1564 
1567  template< typename T1, typename T2, typename T3, typename T4 >
1568  struct UseVectorizedDefaultKernel {
1569  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1570  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1571  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1572  IsSame<typename T1::ElementType,T4>::value &&
1573  IntrinsicTrait<typename T1::ElementType>::addition &&
1574  IntrinsicTrait<typename T1::ElementType>::multiplication };
1575  };
1576  //**********************************************************************************************
1577 
1578  public:
1579  //**Type definitions****************************************************************************
1580  typedef DVecScalarMultExpr<VMM,ST,true> This;
1581  typedef typename MultTrait<RES,ST>::Type ResultType;
1582  typedef typename ResultType::TransposeType TransposeType;
1583  typedef typename ResultType::ElementType ElementType;
1584  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1585  typedef const ElementType ReturnType;
1586  typedef const ResultType CompositeType;
1587 
1589  typedef const TDVecTDMatMultExpr<VT,MT> LeftOperand;
1590 
1592  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
1593 
1595  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
1596 
1598  typedef typename SelectType< evaluate, const MRT, MCT >::Type RT;
1599  //**********************************************************************************************
1600 
1601  //**Compilation flags***************************************************************************
1603  enum { vectorizable = 0 };
1604 
1606  enum { canAlias = CanAlias<VMM>::value };
1607  //**********************************************************************************************
1608 
1609  //**Constructor*********************************************************************************
1615  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1616  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1617  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1618  {}
1619  //**********************************************************************************************
1620 
1621  //**Subscript operator**************************************************************************
1627  inline ReturnType operator[]( size_t index ) const {
1628  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1629  return vector_[index] * scalar_;
1630  }
1631  //**********************************************************************************************
1632 
1633  //**Size function*******************************************************************************
1638  inline size_t size() const {
1639  return vector_.size();
1640  }
1641  //**********************************************************************************************
1642 
1643  //**Left operand access*************************************************************************
1648  inline LeftOperand leftOperand() const {
1649  return vector_;
1650  }
1651  //**********************************************************************************************
1652 
1653  //**Right operand access************************************************************************
1658  inline RightOperand rightOperand() const {
1659  return scalar_;
1660  }
1661  //**********************************************************************************************
1662 
1663  //**********************************************************************************************
1669  template< typename T >
1670  inline bool isAliased( const T* alias ) const {
1671  return CanAlias<VMM>::value && vector_.isAliased( alias );
1672  }
1673  //**********************************************************************************************
1674 
1675  private:
1676  //**Member variables****************************************************************************
1677  LeftOperand vector_;
1678  RightOperand scalar_;
1679  //**********************************************************************************************
1680 
1681  //**Assignment to dense vectors*****************************************************************
1693  template< typename VT1 // Type of the target dense vector
1694  , bool TF > // Transpose flag of the target dense vector
1695  friend inline void assign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
1696  {
1697  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1698 
1699  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1700  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1701 
1702  if( right.rows() == 0UL ) {
1703  reset( ~lhs );
1704  return;
1705  }
1706  else if( right.columns() == 0UL ) {
1707  return;
1708  }
1709 
1710  LT x( left ); // Evaluation of the left-hand side dense vector operand
1711  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1712 
1713  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1714  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1715  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1716  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1717 
1718  if( ( IsComputation<MT>::value && !evaluate ) ||
1719  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1720  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1721  else
1722  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1723  }
1724  //**********************************************************************************************
1725 
1726  //**Default assignment to dense vectors*********************************************************
1740  template< typename VT1 // Type of the left-hand side target vector
1741  , typename VT2 // Type of the left-hand side vector operand
1742  , typename MT1 // Type of the right-hand side matrix operand
1743  , typename ST2 > // Type of the scalar value
1744  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1745  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1746  {
1747  y.assign( x * A * scalar );
1748  }
1749  //**********************************************************************************************
1750 
1751  //**Vectorized default assignment to dense vectors**********************************************
1765  template< typename VT1 // Type of the left-hand side target vector
1766  , typename VT2 // Type of the left-hand side vector operand
1767  , typename MT1 // Type of the right-hand side matrix operand
1768  , typename ST2 > // Type of the scalar value
1769  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1770  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1771  {
1772  typedef IntrinsicTrait<ElementType> IT;
1773 
1774  const size_t M( A.rows() );
1775  const size_t N( A.columns() );
1776 
1777  size_t j( 0UL );
1778 
1779  for( ; (j+8UL) <= N; j+=8UL ) {
1780  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1781  for( size_t i=0UL; i<M; i+=IT::size ) {
1782  const IntrinsicType x1( x.get(i) );
1783  xmm1 = xmm1 + x1 * A.get(i,j );
1784  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1785  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1786  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1787  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1788  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1789  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1790  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1791  }
1792  y[j ] = sum( xmm1 ) * scalar;
1793  y[j+1UL] = sum( xmm2 ) * scalar;
1794  y[j+2UL] = sum( xmm3 ) * scalar;
1795  y[j+3UL] = sum( xmm4 ) * scalar;
1796  y[j+4UL] = sum( xmm5 ) * scalar;
1797  y[j+5UL] = sum( xmm6 ) * scalar;
1798  y[j+6UL] = sum( xmm7 ) * scalar;
1799  y[j+7UL] = sum( xmm8 ) * scalar;
1800  }
1801  for( ; (j+4UL) <= N; j+=4UL ) {
1802  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1803  for( size_t i=0UL; i<M; i+=IT::size ) {
1804  const IntrinsicType x1( x.get(i) );
1805  xmm1 = xmm1 + x1 * A.get(i,j );
1806  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1807  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1808  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1809  }
1810  y[j ] = sum( xmm1 ) * scalar;
1811  y[j+1UL] = sum( xmm2 ) * scalar;
1812  y[j+2UL] = sum( xmm3 ) * scalar;
1813  y[j+3UL] = sum( xmm4 ) * scalar;
1814  }
1815  for( ; (j+3UL) <= N; j+=3UL ) {
1816  IntrinsicType xmm1, xmm2, xmm3;
1817  for( size_t i=0UL; i<M; i+=IT::size ) {
1818  const IntrinsicType x1( x.get(i) );
1819  xmm1 = xmm1 + x1 * A.get(i,j );
1820  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1821  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1822  }
1823  y[j ] = sum( xmm1 ) * scalar;
1824  y[j+1UL] = sum( xmm2 ) * scalar;
1825  y[j+2UL] = sum( xmm3 ) * scalar;
1826  }
1827  for( ; (j+2UL) <= N; j+=2UL ) {
1828  IntrinsicType xmm1, xmm2;
1829  for( size_t i=0UL; i<M; i+=IT::size ) {
1830  const IntrinsicType x1( x.get(i) );
1831  xmm1 = xmm1 + x1 * A.get(i,j );
1832  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1833  }
1834  y[j ] = sum( xmm1 ) * scalar;
1835  y[j+1UL] = sum( xmm2 ) * scalar;
1836  }
1837  if( j < N ) {
1838  IntrinsicType xmm1;
1839  for( size_t i=0UL; i<M; i+=IT::size ) {
1840  xmm1 = xmm1 + A.get(i,j) * x.get(i);
1841  }
1842  y[j] = sum( xmm1 ) * scalar;
1843  }
1844  }
1845  //**********************************************************************************************
1846 
1847  //**BLAS-based assignment to dense vectors (default)********************************************
1860  template< typename VT1 // Type of the left-hand side target vector
1861  , typename VT2 // Type of the left-hand side vector operand
1862  , typename MT1 // Type of the right-hand side matrix operand
1863  , typename ST2 > // Type of the scalar value
1864  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1865  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1866  {
1867  selectDefaultAssignKernel( y, x, A, scalar );
1868  }
1869  //**********************************************************************************************
1870 
1871  //**BLAS-based assignment to dense vectors (single precision)***********************************
1872 #if BLAZE_BLAS_MODE
1873 
1886  template< typename VT1 // Type of the left-hand side target vector
1887  , typename VT2 // Type of the left-hand side vector operand
1888  , typename MT1 // Type of the right-hand side matrix operand
1889  , typename ST2 > // Type of the scalar value
1890  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1891  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1892  {
1893  using boost::numeric_cast;
1894 
1895  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1896  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1897  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1898 
1899  const int M ( numeric_cast<int>( A.rows() ) );
1900  const int N ( numeric_cast<int>( A.columns() ) );
1901  const int lda( numeric_cast<int>( A.spacing() ) );
1902 
1903  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
1904  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1905  }
1906 #endif
1907  //**********************************************************************************************
1908 
1909  //**BLAS-based assignment to dense vectors (double precision)***********************************
1910 #if BLAZE_BLAS_MODE
1911 
1924  template< typename VT1 // Type of the left-hand side target vector
1925  , typename VT2 // Type of the left-hand side vector operand
1926  , typename MT1 // Type of the right-hand side matrix operand
1927  , typename ST2 > // Type of the scalar value
1928  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1929  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1930  {
1931  using boost::numeric_cast;
1932 
1933  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1934  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1935  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1936 
1937  const int M ( numeric_cast<int>( A.rows() ) );
1938  const int N ( numeric_cast<int>( A.columns() ) );
1939  const int lda( numeric_cast<int>( A.spacing() ) );
1940 
1941  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
1942  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1943  }
1944 #endif
1945  //**********************************************************************************************
1946 
1947  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1948 #if BLAZE_BLAS_MODE
1949 
1963  template< typename VT1 // Type of the left-hand side target vector
1964  , typename VT2 // Type of the left-hand side vector operand
1965  , typename MT1 // Type of the right-hand side matrix operand
1966  , typename ST2 > // Type of the scalar value
1967  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1968  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1969  {
1970  using boost::numeric_cast;
1971 
1972  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1973  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1974  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1976  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1977  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1978  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1979 
1980  const int M ( numeric_cast<int>( A.rows() ) );
1981  const int N ( numeric_cast<int>( A.columns() ) );
1982  const int lda( numeric_cast<int>( A.spacing() ) );
1983  const complex<float> alpha( scalar );
1984  const complex<float> beta ( 0.0F, 0.0F );
1985 
1986  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1987  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1988  }
1989 #endif
1990  //**********************************************************************************************
1991 
1992  //**BLAS-based assignment to dense vectors (double precision complex)***************************
1993 #if BLAZE_BLAS_MODE
1994 
2008  template< typename VT1 // Type of the left-hand side target vector
2009  , typename VT2 // Type of the left-hand side vector operand
2010  , typename MT1 // Type of the right-hand side matrix operand
2011  , typename ST2 > // Type of the scalar value
2012  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2013  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2014  {
2015  using boost::numeric_cast;
2016 
2017  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2018  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2019  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2021  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2022  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2023  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2024 
2025  const int M ( numeric_cast<int>( A.rows() ) );
2026  const int N ( numeric_cast<int>( A.columns() ) );
2027  const int lda( numeric_cast<int>( A.spacing() ) );
2028  const complex<double> alpha( scalar );
2029  const complex<double> beta ( 0.0, 0.0 );
2030 
2031  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2032  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2033  }
2034 #endif
2035  //**********************************************************************************************
2036 
2037  //**Assignment to sparse vectors****************************************************************
2049  template< typename VT1 // Type of the target sparse vector
2050  , bool TF > // Transpose flag of the target sparse vector
2051  friend inline void assign( SparseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2052  {
2055  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2056 
2057  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2058 
2059  const ResultType tmp( rhs );
2060  assign( ~lhs, tmp );
2061  }
2062  //**********************************************************************************************
2063 
2064  //**Addition assignment to dense vectors********************************************************
2076  template< typename VT1 // Type of the target dense vector
2077  , bool TF > // Transpose flag of the target dense vector
2078  friend inline void addAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2079  {
2080  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2081 
2082  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2083  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2084 
2085  if( right.rows() == 0UL || right.columns() == 0UL ) {
2086  return;
2087  }
2088 
2089  LT x( left ); // Evaluation of the left-hand side dense vector operand
2090  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2091 
2092  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2093  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2094  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2095  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2096 
2097  if( ( IsComputation<MT>::value && !evaluate ) ||
2098  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2099  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2100  else
2101  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2102  }
2103  //**********************************************************************************************
2104 
2105  //**Default addition assignment to dense vectors************************************************
2119  template< typename VT1 // Type of the left-hand side target vector
2120  , typename VT2 // Type of the left-hand side vector operand
2121  , typename MT1 // Type of the right-hand side matrix operand
2122  , typename ST2 > // Type of the scalar value
2123  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2124  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2125  {
2126  y.addAssign( x * A * scalar );
2127  }
2128  //**********************************************************************************************
2129 
2130  //**Vectorized default addition assignment to dense vectors*************************************
2144  template< typename VT1 // Type of the left-hand side target vector
2145  , typename VT2 // Type of the left-hand side vector operand
2146  , typename MT1 // Type of the right-hand side matrix operand
2147  , typename ST2 > // Type of the scalar value
2148  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2149  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2150  {
2151  typedef IntrinsicTrait<ElementType> IT;
2152 
2153  const size_t M( A.rows() );
2154  const size_t N( A.columns() );
2155 
2156  size_t j( 0UL );
2157 
2158  for( ; (j+8UL) <= N; j+=8UL ) {
2159  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2160  for( size_t i=0UL; i<M; i+=IT::size ) {
2161  const IntrinsicType x1( x.get(i) );
2162  xmm1 = xmm1 + x1 * A.get(i,j );
2163  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2164  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2165  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2166  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2167  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2168  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2169  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2170  }
2171  y[j ] += sum( xmm1 ) * scalar;
2172  y[j+1UL] += sum( xmm2 ) * scalar;
2173  y[j+2UL] += sum( xmm3 ) * scalar;
2174  y[j+3UL] += sum( xmm4 ) * scalar;
2175  y[j+4UL] += sum( xmm5 ) * scalar;
2176  y[j+5UL] += sum( xmm6 ) * scalar;
2177  y[j+6UL] += sum( xmm7 ) * scalar;
2178  y[j+7UL] += sum( xmm8 ) * scalar;
2179  }
2180  for( ; (j+4UL) <= N; j+=4UL ) {
2181  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2182  for( size_t i=0UL; i<M; i+=IT::size ) {
2183  const IntrinsicType x1( x.get(i) );
2184  xmm1 = xmm1 + x1 * A.get(i,j );
2185  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2186  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2187  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2188  }
2189  y[j ] += sum( xmm1 ) * scalar;
2190  y[j+1UL] += sum( xmm2 ) * scalar;
2191  y[j+2UL] += sum( xmm3 ) * scalar;
2192  y[j+3UL] += sum( xmm4 ) * scalar;
2193  }
2194  for( ; (j+3UL) <= N; j+=3UL ) {
2195  IntrinsicType xmm1, xmm2, xmm3;
2196  for( size_t i=0UL; i<M; i+=IT::size ) {
2197  const IntrinsicType x1( x.get(i) );
2198  xmm1 = xmm1 + x1 * A.get(i,j );
2199  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2200  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2201  }
2202  y[j ] += sum( xmm1 ) * scalar;
2203  y[j+1UL] += sum( xmm2 ) * scalar;
2204  y[j+2UL] += sum( xmm3 ) * scalar;
2205  }
2206  for( ; (j+2UL) <= N; j+=2UL ) {
2207  IntrinsicType xmm1, xmm2;
2208  for( size_t i=0UL; i<M; i+=IT::size ) {
2209  const IntrinsicType x1( x.get(i) );
2210  xmm1 = xmm1 + x1 * A.get(i,j );
2211  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2212  }
2213  y[j ] += sum( xmm1 ) * scalar;
2214  y[j+1UL] += sum( xmm2 ) * scalar;
2215  }
2216  if( j < N ) {
2217  IntrinsicType xmm1;
2218  for( size_t i=0UL; i<M; i+=IT::size ) {
2219  xmm1 = xmm1 + A.get(i,j) * x.get(i);
2220  }
2221  y[j] += sum( xmm1 ) * scalar;
2222  }
2223  }
2224  //**********************************************************************************************
2225 
2226  //**BLAS-based addition assignment to dense vectors (default)***********************************
2240  template< typename VT1 // Type of the left-hand side target vector
2241  , typename VT2 // Type of the left-hand side vector operand
2242  , typename MT1 // Type of the right-hand side matrix operand
2243  , typename ST2 > // Type of the scalar value
2244  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2245  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2246  {
2247  selectDefaultAddAssignKernel( y, x, A, scalar );
2248  }
2249  //**********************************************************************************************
2250 
2251  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2252 #if BLAZE_BLAS_MODE
2253 
2266  template< typename VT1 // Type of the left-hand side target vector
2267  , typename VT2 // Type of the left-hand side vector operand
2268  , typename MT1 // Type of the right-hand side matrix operand
2269  , typename ST2 > // Type of the scalar value
2270  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2271  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2272  {
2273  using boost::numeric_cast;
2274 
2275  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2276  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2277  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2278 
2279  const int M ( numeric_cast<int>( A.rows() ) );
2280  const int N ( numeric_cast<int>( A.columns() ) );
2281  const int lda( numeric_cast<int>( A.spacing() ) );
2282 
2283  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
2284  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2285  }
2286 #endif
2287  //**********************************************************************************************
2288 
2289  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2290 #if BLAZE_BLAS_MODE
2291 
2304  template< typename VT1 // Type of the left-hand side target vector
2305  , typename VT2 // Type of the left-hand side vector operand
2306  , typename MT1 // Type of the right-hand side matrix operand
2307  , typename ST2 > // Type of the scalar value
2308  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2309  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2310  {
2311  using boost::numeric_cast;
2312 
2313  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2314  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2315  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2316 
2317  const int M ( numeric_cast<int>( A.rows() ) );
2318  const int N ( numeric_cast<int>( A.columns() ) );
2319  const int lda( numeric_cast<int>( A.spacing() ) );
2320 
2321  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2322  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2323  }
2324 #endif
2325  //**********************************************************************************************
2326 
2327  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2328 #if BLAZE_BLAS_MODE
2329 
2343  template< typename VT1 // Type of the left-hand side target vector
2344  , typename VT2 // Type of the left-hand side vector operand
2345  , typename MT1 // Type of the right-hand side matrix operand
2346  , typename ST2 > // Type of the scalar value
2347  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2348  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2349  {
2350  using boost::numeric_cast;
2351 
2352  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2353  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2354  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2356  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2357  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2358  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2359 
2360  const int M ( numeric_cast<int>( A.rows() ) );
2361  const int N ( numeric_cast<int>( A.columns() ) );
2362  const int lda( numeric_cast<int>( A.spacing() ) );
2363  const complex<float> alpha( scalar );
2364  const complex<float> beta ( 1.0F, 0.0F );
2365 
2366  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2367  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2368  }
2369 #endif
2370  //**********************************************************************************************
2371 
2372  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2373 #if BLAZE_BLAS_MODE
2374 
2388  template< typename VT1 // Type of the left-hand side target vector
2389  , typename VT2 // Type of the left-hand side vector operand
2390  , typename MT1 // Type of the right-hand side matrix operand
2391  , typename ST2 > // Type of the scalar value
2392  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2393  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2394  {
2395  using boost::numeric_cast;
2396 
2397  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2398  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2399  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2401  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2402  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2403  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2404 
2405  const int M ( numeric_cast<int>( A.rows() ) );
2406  const int N ( numeric_cast<int>( A.columns() ) );
2407  const int lda( numeric_cast<int>( A.spacing() ) );
2408  const complex<double> alpha( scalar );
2409  const complex<double> beta ( 1.0, 0.0 );
2410 
2411  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2412  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2413  }
2414 #endif
2415  //**********************************************************************************************
2416 
2417  //**Addition assignment to sparse vectors*******************************************************
2418  // No special implementation for the addition assignment to sparse vectors.
2419  //**********************************************************************************************
2420 
2421  //**Subtraction assignment to dense vectors*****************************************************
2433  template< typename VT1 // Type of the target dense vector
2434  , bool TF > // Transpose flag of the target dense vector
2435  friend inline void subAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2436  {
2437  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2438 
2439  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2440  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2441 
2442  if( right.rows() == 0UL || right.columns() == 0UL ) {
2443  return;
2444  }
2445 
2446  LT x( left ); // Evaluation of the left-hand side dense vector operand
2447  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2448 
2449  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2450  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2451  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2452  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2453 
2454  if( ( IsComputation<MT>::value && !evaluate ) ||
2455  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2456  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2457  else
2458  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2459  }
2460  //**********************************************************************************************
2461 
2462  //**Default subtraction assignment to dense vectors*********************************************
2476  template< typename VT1 // Type of the left-hand side target vector
2477  , typename VT2 // Type of the left-hand side vector operand
2478  , typename MT1 // Type of the right-hand side matrix operand
2479  , typename ST2 > // Type of the scalar value
2480  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2481  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2482  {
2483  y.subAssign( x * A * scalar );
2484  }
2485  //**********************************************************************************************
2486 
2487  //**Vectorized default subtraction assignment to dense vectors**********************************
2501  template< typename VT1 // Type of the left-hand side target vector
2502  , typename VT2 // Type of the left-hand side vector operand
2503  , typename MT1 // Type of the right-hand side matrix operand
2504  , typename ST2 > // Type of the scalar value
2505  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2506  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2507  {
2508  typedef IntrinsicTrait<ElementType> IT;
2509 
2510  const size_t M( A.rows() );
2511  const size_t N( A.columns() );
2512 
2513  size_t j( 0UL );
2514 
2515  for( ; (j+8UL) <= N; j+=8UL ) {
2516  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2517  for( size_t i=0UL; i<M; i+=IT::size ) {
2518  const IntrinsicType x1( x.get(i) );
2519  xmm1 = xmm1 + x1 * A.get(i,j );
2520  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2521  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2522  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2523  xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2524  xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2525  xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2526  xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2527  }
2528  y[j ] -= sum( xmm1 ) * scalar;
2529  y[j+1UL] -= sum( xmm2 ) * scalar;
2530  y[j+2UL] -= sum( xmm3 ) * scalar;
2531  y[j+3UL] -= sum( xmm4 ) * scalar;
2532  y[j+4UL] -= sum( xmm5 ) * scalar;
2533  y[j+5UL] -= sum( xmm6 ) * scalar;
2534  y[j+6UL] -= sum( xmm7 ) * scalar;
2535  y[j+7UL] -= sum( xmm8 ) * scalar;
2536  }
2537  for( ; (j+4UL) <= N; j+=4UL ) {
2538  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2539  for( size_t i=0UL; i<M; i+=IT::size ) {
2540  const IntrinsicType x1( x.get(i) );
2541  xmm1 = xmm1 + x1 * A.get(i,j );
2542  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2543  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2544  xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2545  }
2546  y[j ] -= sum( xmm1 ) * scalar;
2547  y[j+1UL] -= sum( xmm2 ) * scalar;
2548  y[j+2UL] -= sum( xmm3 ) * scalar;
2549  y[j+3UL] -= sum( xmm4 ) * scalar;
2550  }
2551  for( ; (j+3UL) <= N; j+=3UL ) {
2552  IntrinsicType xmm1, xmm2, xmm3;
2553  for( size_t i=0UL; i<M; i+=IT::size ) {
2554  const IntrinsicType x1( x.get(i) );
2555  xmm1 = xmm1 + x1 * A.get(i,j );
2556  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2557  xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2558  }
2559  y[j ] -= sum( xmm1 ) * scalar;
2560  y[j+1UL] -= sum( xmm2 ) * scalar;
2561  y[j+2UL] -= sum( xmm3 ) * scalar;
2562  }
2563  for( ; (j+2UL) <= N; j+=2UL ) {
2564  IntrinsicType xmm1, xmm2;
2565  for( size_t i=0UL; i<M; i+=IT::size ) {
2566  const IntrinsicType x1( x.get(i) );
2567  xmm1 = xmm1 + x1 * A.get(i,j );
2568  xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2569  }
2570  y[j ] -= sum( xmm1 ) * scalar;
2571  y[j+1UL] -= sum( xmm2 ) * scalar;
2572  }
2573  if( j < N ) {
2574  IntrinsicType xmm1;
2575  for( size_t i=0UL; i<M; i+=IT::size ) {
2576  xmm1 = xmm1 + A.get(i,j) * x.get(i);
2577  }
2578  y[j] -= sum( xmm1 ) * scalar;
2579  }
2580  }
2581  //**********************************************************************************************
2582 
2583  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2598  template< typename VT1 // Type of the left-hand side target vector
2599  , typename VT2 // Type of the left-hand side vector operand
2600  , typename MT1 // Type of the right-hand side matrix operand
2601  , typename ST2 > // Type of the scalar value
2602  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2603  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2604  {
2605  selectDefaultSubAssignKernel( y, x, A, scalar );
2606  }
2607  //**********************************************************************************************
2608 
2609  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2610 #if BLAZE_BLAS_MODE
2611 
2624  template< typename VT1 // Type of the left-hand side target vector
2625  , typename VT2 // Type of the left-hand side vector operand
2626  , typename MT1 // Type of the right-hand side matrix operand
2627  , typename ST2 > // Type of the scalar value
2628  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2629  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2630  {
2631  using boost::numeric_cast;
2632 
2633  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2634  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2635  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2636 
2637  const int M ( numeric_cast<int>( A.rows() ) );
2638  const int N ( numeric_cast<int>( A.columns() ) );
2639  const int lda( numeric_cast<int>( A.spacing() ) );
2640 
2641  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2642  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2643  }
2644 #endif
2645  //**********************************************************************************************
2646 
2647  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2648 #if BLAZE_BLAS_MODE
2649 
2662  template< typename VT1 // Type of the left-hand side target vector
2663  , typename VT2 // Type of the left-hand side vector operand
2664  , typename MT1 // Type of the right-hand side matrix operand
2665  , typename ST2 > // Type of the scalar value
2666  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2667  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2668  {
2669  using boost::numeric_cast;
2670 
2671  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2672  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2673  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2674 
2675  const int M ( numeric_cast<int>( A.rows() ) );
2676  const int N ( numeric_cast<int>( A.columns() ) );
2677  const int lda( numeric_cast<int>( A.spacing() ) );
2678 
2679  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2680  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2681  }
2682 #endif
2683  //**********************************************************************************************
2684 
2685  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2686 #if BLAZE_BLAS_MODE
2687 
2702  template< typename VT1 // Type of the left-hand side target vector
2703  , typename VT2 // Type of the left-hand side vector operand
2704  , typename MT1 // Type of the right-hand side matrix operand
2705  , typename ST2 > // Type of the scalar value
2706  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2707  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2708  {
2709  using boost::numeric_cast;
2710 
2711  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2712  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2713  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2715  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2716  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2717  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2718 
2719  const int M ( numeric_cast<int>( A.rows() ) );
2720  const int N ( numeric_cast<int>( A.columns() ) );
2721  const int lda( numeric_cast<int>( A.spacing() ) );
2722  const complex<float> alpha( -scalar );
2723  const complex<float> beta ( 1.0F, 0.0F );
2724 
2725  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2726  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2727  }
2728 #endif
2729  //**********************************************************************************************
2730 
2731  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2732 #if BLAZE_BLAS_MODE
2733 
2748  template< typename VT1 // Type of the left-hand side target vector
2749  , typename VT2 // Type of the left-hand side vector operand
2750  , typename MT1 // Type of the right-hand side matrix operand
2751  , typename ST2 > // Type of the scalar value
2752  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2753  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2754  {
2755  using boost::numeric_cast;
2756 
2757  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2758  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2759  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2761  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2762  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2763  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2764 
2765  const int M ( numeric_cast<int>( A.rows() ) );
2766  const int N ( numeric_cast<int>( A.columns() ) );
2767  const int lda( numeric_cast<int>( A.spacing() ) );
2768  const complex<double> alpha( -scalar );
2769  const complex<double> beta ( 1.0, 0.0 );
2770 
2771  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2772  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2773  }
2774 #endif
2775  //**********************************************************************************************
2776 
2777  //**Subtraction assignment to sparse vectors****************************************************
2778  // No special implementation for the subtraction assignment to sparse vectors.
2779  //**********************************************************************************************
2780 
2781  //**Multiplication assignment to dense vectors**************************************************
2793  template< typename VT1 // Type of the target dense vector
2794  , bool TF > // Transpose flag of the target dense vector
2795  friend inline void multAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2796  {
2799  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2800 
2801  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2802 
2803  const ResultType tmp( rhs );
2804  multAssign( ~lhs, tmp );
2805  }
2806  //**********************************************************************************************
2807 
2808  //**Multiplication assignment to sparse vectors*******************************************************
2809  // No special implementation for the multiplication assignment to sparse vectors.
2810  //**********************************************************************************************
2811 
2812  //**Compile time checks*************************************************************************
2820  //**********************************************************************************************
2821 };
2823 //*************************************************************************************************
2824 
2825 
2826 
2827 
2828 //=================================================================================================
2829 //
2830 // GLOBAL BINARY ARITHMETIC OPERATORS
2831 //
2832 //=================================================================================================
2833 
2834 //*************************************************************************************************
2865 template< typename T1 // Type of the left-hand side dense vector
2866  , typename T2 > // Type of the right-hand side dense matrix
2867 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecTDMatMultExpr<T1,T2> >::Type
2869 {
2870  if( (~vec).size() != (~mat).rows() )
2871  throw std::invalid_argument( "Vector and matrix sizes do not match" );
2872 
2873  return TDVecTDMatMultExpr<T1,T2>( ~vec, ~mat );
2874 }
2875 //*************************************************************************************************
2876 
2877 } // namespace blaze
2878 
2879 #endif