All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
49 #include <blaze/system/BLAS.h>
51 #include <blaze/util/Assert.h>
52 #include <blaze/util/Complex.h>
57 #include <blaze/util/DisableIf.h>
58 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/SelectType.h>
61 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // CLASS DMATDVECMULTEXPR
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
84 template< typename MT // Type of the left-hand side dense matrix
85  , typename VT > // Type of the right-hand side dense vector
86 class DMatDVecMultExpr : public DenseVector< DMatDVecMultExpr<MT,VT>, false >
87  , private Expression
88  , private Computation
89 {
90  private:
91  //**Type definitions****************************************************************************
92  typedef typename MT::ResultType MRT;
93  typedef typename VT::ResultType VRT;
94  typedef typename MRT::ElementType MET;
95  typedef typename VRT::ElementType VET;
96  typedef typename MT::CompositeType MCT;
97  typedef typename VT::CompositeType VCT;
98  //**********************************************************************************************
99 
100  //**********************************************************************************************
102  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
104  //**********************************************************************************************
105 
106  //**********************************************************************************************
108 
109 
112  template< typename T1, typename T2, typename T3 >
113  struct UseSinglePrecisionKernel {
114  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
118  };
120  //**********************************************************************************************
121 
122  //**********************************************************************************************
124 
125 
128  template< typename T1, typename T2, typename T3 >
129  struct UseDoublePrecisionKernel {
130  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
131  IsDouble<typename T1::ElementType>::value &&
132  IsDouble<typename T2::ElementType>::value &&
133  IsDouble<typename T3::ElementType>::value };
134  };
136  //**********************************************************************************************
137 
138  //**********************************************************************************************
140 
141 
144  template< typename T1, typename T2, typename T3 >
145  struct UseSinglePrecisionComplexKernel {
146  typedef complex<float> Type;
147  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
148  IsSame<typename T1::ElementType,Type>::value &&
149  IsSame<typename T2::ElementType,Type>::value &&
150  IsSame<typename T3::ElementType,Type>::value };
151  };
153  //**********************************************************************************************
154 
155  //**********************************************************************************************
157 
158 
161  template< typename T1, typename T2, typename T3 >
162  struct UseDoublePrecisionComplexKernel {
163  typedef complex<double> Type;
164  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
165  IsSame<typename T1::ElementType,Type>::value &&
166  IsSame<typename T2::ElementType,Type>::value &&
167  IsSame<typename T3::ElementType,Type>::value };
168  };
170  //**********************************************************************************************
171 
172  //**********************************************************************************************
174 
175 
177  template< typename T1, typename T2, typename T3 >
178  struct UseDefaultKernel {
179  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
180  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
181  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
182  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
183  };
185  //**********************************************************************************************
186 
187  //**********************************************************************************************
189 
190 
193  template< typename T1, typename T2, typename T3 >
194  struct UseVectorizedDefaultKernel {
195  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
196  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
197  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
198  IntrinsicTrait<typename T1::ElementType>::addition &&
199  IntrinsicTrait<typename T1::ElementType>::multiplication };
200  };
202  //**********************************************************************************************
203 
204  public:
205  //**Type definitions****************************************************************************
208  typedef typename ResultType::TransposeType TransposeType;
209  typedef typename ResultType::ElementType ElementType;
211  typedef const ElementType ReturnType;
212  typedef const ResultType CompositeType;
213 
215  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
216 
218  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
219 
222 
224  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
225  //**********************************************************************************************
226 
227  //**Compilation flags***************************************************************************
229  enum { vectorizable = 0 };
230  //**********************************************************************************************
231 
232  //**Constructor*********************************************************************************
238  explicit inline DMatDVecMultExpr( const MT& mat, const VT& vec )
239  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
240  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
241  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
242  {
243  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
244  }
245  //**********************************************************************************************
246 
247  //**Subscript operator**************************************************************************
253  inline ReturnType operator[]( size_t index ) const {
254  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
255 
256  ElementType res;
257 
258  if( mat_.columns() != 0UL ) {
259  res = mat_(index,0UL) * vec_[0UL];
260  for( size_t j=1UL; j<end_; j+=2UL ) {
261  res += mat_(index,j) * vec_[j] + mat_(index,j+1UL) * vec_[j+1UL];
262  }
263  if( end_ < mat_.columns() ) {
264  res += mat_(index,end_) * vec_[end_];
265  }
266  }
267  else {
268  reset( res );
269  }
270 
271  return res;
272  }
273  //**********************************************************************************************
274 
275  //**Size function*******************************************************************************
280  inline size_t size() const {
281  return mat_.rows();
282  }
283  //**********************************************************************************************
284 
285  //**Left function*******************************************************************************
290  inline LeftOperand leftOperand() const {
291  return mat_;
292  }
293  //**********************************************************************************************
294 
295  //**Right function******************************************************************************
300  inline RightOperand rightOperand() const {
301  return vec_;
302  }
303  //**********************************************************************************************
304 
305  //**********************************************************************************************
311  template< typename T >
312  inline bool canAlias( const T* alias ) const {
313  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
314  }
315  //**********************************************************************************************
316 
317  //**********************************************************************************************
323  template< typename T >
324  inline bool isAliased( const T* alias ) const {
325  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
326  }
327  //**********************************************************************************************
328 
329  private:
330  //**Member variables****************************************************************************
333  const size_t end_;
334  //**********************************************************************************************
335 
336  //**Assignment to dense vectors*****************************************************************
348  template< typename VT1 > // Type of the target dense vector
349  friend inline void assign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
350  {
352 
353  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
354 
355  if( rhs.mat_.rows() == 0UL ) {
356  return;
357  }
358  else if( rhs.mat_.columns() == 0UL ) {
359  reset( ~lhs );
360  return;
361  }
362 
363  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
364  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
365 
366  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
367  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
368  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
369  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
370 
371  if( ( IsComputation<MT>::value && !evaluate ) ||
372  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
373  DMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
374  else
375  DMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
376  }
378  //**********************************************************************************************
379 
380  //**Default assignment to dense vectors*********************************************************
394  template< typename VT1 // Type of the left-hand side target vector
395  , typename MT1 // Type of the left-hand side matrix operand
396  , typename VT2 > // Type of the right-hand side vector operand
397  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
398  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
399  {
400  y.assign( A * x );
401  }
403  //**********************************************************************************************
404 
405  //**Vectorized default assignment to dense vectors**********************************************
419  template< typename VT1 // Type of the left-hand side target vector
420  , typename MT1 // Type of the left-hand side matrix operand
421  , typename VT2 > // Type of the right-hand side vector operand
422  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
423  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
424  {
425  typedef IntrinsicTrait<ElementType> IT;
426 
427  const size_t M( A.rows() );
428  const size_t N( A.columns() );
429 
430  size_t i( 0UL );
431 
432  for( ; (i+8UL) <= M; i+=8UL ) {
433  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
434  for( size_t j=0UL; j<N; j+=IT::size ) {
435  const IntrinsicType x1( x.get(j) );
436  xmm1 = xmm1 + A.get(i ,j) * x1;
437  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
438  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
439  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
440  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
441  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
442  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
443  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
444  }
445  y[i ] = sum( xmm1 );
446  y[i+1UL] = sum( xmm2 );
447  y[i+2UL] = sum( xmm3 );
448  y[i+3UL] = sum( xmm4 );
449  y[i+4UL] = sum( xmm5 );
450  y[i+5UL] = sum( xmm6 );
451  y[i+6UL] = sum( xmm7 );
452  y[i+7UL] = sum( xmm8 );
453  }
454  for( ; (i+4UL) <= M; i+=4UL ) {
455  IntrinsicType xmm1, xmm2, xmm3, xmm4;
456  for( size_t j=0UL; j<N; j+=IT::size ) {
457  const IntrinsicType x1( x.get(j) );
458  xmm1 = xmm1 + A.get(i ,j) * x1;
459  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
460  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
461  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
462  }
463  y[i ] = sum( xmm1 );
464  y[i+1UL] = sum( xmm2 );
465  y[i+2UL] = sum( xmm3 );
466  y[i+3UL] = sum( xmm4 );
467  }
468  for( ; (i+3UL) <= M; i+=3UL ) {
469  IntrinsicType xmm1, xmm2, xmm3;
470  for( size_t j=0UL; j<N; j+=IT::size ) {
471  const IntrinsicType x1( x.get(j) );
472  xmm1 = xmm1 + A.get(i ,j) * x1;
473  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
474  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
475  }
476  y[i ] = sum( xmm1 );
477  y[i+1UL] = sum( xmm2 );
478  y[i+2UL] = sum( xmm3 );
479  }
480  for( ; (i+2UL) <= M; i+=2UL ) {
481  IntrinsicType xmm1, xmm2;
482  for( size_t j=0UL; j<N; j+=IT::size ) {
483  const IntrinsicType x1( x.get(j) );
484  xmm1 = xmm1 + A.get(i ,j) * x1;
485  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
486  }
487  y[i ] = sum( xmm1 );
488  y[i+1UL] = sum( xmm2 );
489  }
490  if( i < M ) {
491  IntrinsicType xmm1;
492  for( size_t j=0UL; j<N; j+=IT::size ) {
493  xmm1 = xmm1 + A.get(i,j) * x.get(j);
494  }
495  y[i] = sum( xmm1 );
496  }
497  }
499  //**********************************************************************************************
500 
501  //**BLAS-based assignment to dense vectors (default)********************************************
515  template< typename VT1 // Type of the left-hand side target vector
516  , typename MT1 // Type of the left-hand side matrix operand
517  , typename VT2 > // Type of the right-hand side vector operand
518  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
519  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
520  {
521  selectDefaultAssignKernel( y, A, x );
522  }
524  //**********************************************************************************************
525 
526  //**BLAS-based assignment to dense vectors (single precision)***********************************
527 #if BLAZE_BLAS_MODE
528 
541  template< typename VT1 // Type of the left-hand side target vector
542  , typename MT1 // Type of the left-hand side matrix operand
543  , typename VT2 > // Type of the right-hand side vector operand
544  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
545  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
546  {
547  using boost::numeric_cast;
548 
549  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
550  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
551  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
552 
553  const int M ( numeric_cast<int>( A.rows() ) );
554  const int N ( numeric_cast<int>( A.columns() ) );
555  const int lda( numeric_cast<int>( A.spacing() ) );
556 
557  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
558  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
559  }
561 #endif
562  //**********************************************************************************************
563 
564  //**BLAS-based assignment to dense vectors (double precision)***********************************
565 #if BLAZE_BLAS_MODE
566 
579  template< typename VT1 // Type of the left-hand side target vector
580  , typename MT1 // Type of the left-hand side matrix operand
581  , typename VT2 > // Type of the right-hand side vector operand
582  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
583  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
584  {
585  using boost::numeric_cast;
586 
587  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
588  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
589  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
590 
591  const int M ( numeric_cast<int>( A.rows() ) );
592  const int N ( numeric_cast<int>( A.columns() ) );
593  const int lda( numeric_cast<int>( A.spacing() ) );
594 
595  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
596  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
597  }
599 #endif
600  //**********************************************************************************************
601 
602  //**BLAS-based assignment to dense vectors (single precision complex)***************************
603 #if BLAZE_BLAS_MODE
604 
617  template< typename VT1 // Type of the left-hand side target vector
618  , typename MT1 // Type of the left-hand side matrix operand
619  , typename VT2 > // Type of the right-hand side vector operand
620  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
621  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
622  {
623  using boost::numeric_cast;
624 
625  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
626  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
627  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
628  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
629  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
630  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
631 
632  const int M ( numeric_cast<int>( A.rows() ) );
633  const int N ( numeric_cast<int>( A.columns() ) );
634  const int lda( numeric_cast<int>( A.spacing() ) );
635  const complex<float> alpha( 1.0F, 0.0F );
636  const complex<float> beta ( 0.0F, 0.0F );
637 
638  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
639  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
640  }
642 #endif
643  //**********************************************************************************************
644 
645  //**BLAS-based assignment to dense vectors (double precision complex)***************************
646 #if BLAZE_BLAS_MODE
647 
660  template< typename VT1 // Type of the left-hand side target vector
661  , typename MT1 // Type of the left-hand side matrix operand
662  , typename VT2 > // Type of the right-hand side vector operand
663  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
664  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
665  {
666  using boost::numeric_cast;
667 
668  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
669  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
670  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
671  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
672  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
673  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
674 
675  const int M ( numeric_cast<int>( A.rows() ) );
676  const int N ( numeric_cast<int>( A.columns() ) );
677  const int lda( numeric_cast<int>( A.spacing() ) );
678  const complex<double> alpha( 1.0, 0.0 );
679  const complex<double> beta ( 0.0, 0.0 );
680 
681  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
682  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
683  }
685 #endif
686  //**********************************************************************************************
687 
688  //**Assignment to sparse vectors****************************************************************
700  template< typename VT1 > // Type of the target sparse vector
701  friend inline void assign( SparseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
702  {
704 
707  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
708 
709  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
710 
711  const ResultType tmp( rhs );
712  assign( ~lhs, tmp );
713  }
715  //**********************************************************************************************
716 
717  //**Addition assignment to dense vectors********************************************************
729  template< typename VT1 > // Type of the target dense vector
730  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
731  {
733 
734  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
735 
736  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
737  return;
738  }
739 
740  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
741  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
742 
743  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
744  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
745  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
746  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
747 
748  if( ( IsComputation<MT>::value && !evaluate ) ||
749  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
750  DMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
751  else
752  DMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
753  }
755  //**********************************************************************************************
756 
757  //**Default addition assignment to dense vectors************************************************
771  template< typename VT1 // Type of the left-hand side target vector
772  , typename MT1 // Type of the left-hand side matrix operand
773  , typename VT2 > // Type of the right-hand side vector operand
774  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
775  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
776  {
777  y.addAssign( A * x );
778  }
780  //**********************************************************************************************
781 
782  //**Vectorized default addition assignment to dense vectors*************************************
796  template< typename VT1 // Type of the left-hand side target vector
797  , typename MT1 // Type of the left-hand side matrix operand
798  , typename VT2 > // Type of the right-hand side vector operand
799  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
800  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
801  {
802  typedef IntrinsicTrait<ElementType> IT;
803 
804  const size_t M( A.rows() );
805  const size_t N( A.columns() );
806 
807  size_t i( 0UL );
808 
809  for( ; (i+8UL) <= M; i+=8UL ) {
810  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
811  for( size_t j=0UL; j<N; j+=IT::size ) {
812  const IntrinsicType x1( x.get(j) );
813  xmm1 = xmm1 + A.get(i ,j) * x1;
814  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
815  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
816  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
817  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
818  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
819  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
820  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
821  }
822  y[i ] += sum( xmm1 );
823  y[i+1UL] += sum( xmm2 );
824  y[i+2UL] += sum( xmm3 );
825  y[i+3UL] += sum( xmm4 );
826  y[i+4UL] += sum( xmm5 );
827  y[i+5UL] += sum( xmm6 );
828  y[i+6UL] += sum( xmm7 );
829  y[i+7UL] += sum( xmm8 );
830  }
831  for( ; (i+4UL) <= M; i+=4UL ) {
832  IntrinsicType xmm1, xmm2, xmm3, xmm4;
833  for( size_t j=0UL; j<N; j+=IT::size ) {
834  const IntrinsicType x1( x.get(j) );
835  xmm1 = xmm1 + A.get(i ,j) * x1;
836  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
837  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
838  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
839  }
840  y[i ] += sum( xmm1 );
841  y[i+1UL] += sum( xmm2 );
842  y[i+2UL] += sum( xmm3 );
843  y[i+3UL] += sum( xmm4 );
844  }
845  for( ; (i+3UL) <= M; i+=3UL ) {
846  IntrinsicType xmm1, xmm2, xmm3;
847  for( size_t j=0UL; j<N; j+=IT::size ) {
848  const IntrinsicType x1( x.get(j) );
849  xmm1 = xmm1 + A.get(i ,j) * x1;
850  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
851  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
852  }
853  y[i ] += sum( xmm1 );
854  y[i+1UL] += sum( xmm2 );
855  y[i+2UL] += sum( xmm3 );
856  }
857  for( ; (i+2UL) <= M; i+=2UL ) {
858  IntrinsicType xmm1, xmm2;
859  for( size_t j=0UL; j<N; j+=IT::size ) {
860  const IntrinsicType x1( x.get(j) );
861  xmm1 = xmm1 + A.get(i ,j) * x1;
862  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
863  }
864  y[i ] += sum( xmm1 );
865  y[i+1UL] += sum( xmm2 );
866  }
867  if( i < M ) {
868  IntrinsicType xmm1;
869  for( size_t j=0UL; j<N; j+=IT::size ) {
870  xmm1 = xmm1 + A.get(i,j) * x.get(j);
871  }
872  y[i] += sum( xmm1 );
873  }
874  }
876  //**********************************************************************************************
877 
878  //**BLAS-based addition assignment to dense vectors (default)***********************************
892  template< typename VT1 // Type of the left-hand side target vector
893  , typename MT1 // Type of the left-hand side matrix operand
894  , typename VT2 > // Type of the right-hand side vector operand
895  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
896  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
897  {
898  selectDefaultAddAssignKernel( y, A, x );
899  }
901  //**********************************************************************************************
902 
903  //**BLAS-based addition assignment to dense vectors (single precision)**************************
904 #if BLAZE_BLAS_MODE
905 
918  template< typename VT1 // Type of the left-hand side target vector
919  , typename MT1 // Type of the left-hand side matrix operand
920  , typename VT2 > // Type of the right-hand side vector operand
921  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
922  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
923  {
924  using boost::numeric_cast;
925 
926  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
927  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
928  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
929 
930  const int M ( numeric_cast<int>( A.rows() ) );
931  const int N ( numeric_cast<int>( A.columns() ) );
932  const int lda( numeric_cast<int>( A.spacing() ) );
933 
934  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
935  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
936  }
938 #endif
939  //**********************************************************************************************
940 
941  //**BLAS-based addition assignment to dense vectors (double precision)**************************
942 #if BLAZE_BLAS_MODE
943 
956  template< typename VT1 // Type of the left-hand side target vector
957  , typename MT1 // Type of the left-hand side matrix operand
958  , typename VT2 > // Type of the right-hand side vector operand
959  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
960  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
961  {
962  using boost::numeric_cast;
963 
964  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
965  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
966  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
967 
968  const int M ( numeric_cast<int>( A.rows() ) );
969  const int N ( numeric_cast<int>( A.columns() ) );
970  const int lda( numeric_cast<int>( A.spacing() ) );
971 
972  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
973  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
974  }
976 #endif
977  //**********************************************************************************************
978 
979  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
980 #if BLAZE_BLAS_MODE
981 
994  template< typename VT1 // Type of the left-hand side target vector
995  , typename MT1 // Type of the left-hand side matrix operand
996  , typename VT2 > // Type of the right-hand side vector operand
997  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
998  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
999  {
1000  using boost::numeric_cast;
1001 
1002  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1003  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1004  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1005  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1006  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1007  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1008 
1009  const int M ( numeric_cast<int>( A.rows() ) );
1010  const int N ( numeric_cast<int>( A.columns() ) );
1011  const int lda( numeric_cast<int>( A.spacing() ) );
1012  const complex<float> alpha( 1.0F, 0.0F );
1013  const complex<float> beta ( 1.0F, 0.0F );
1014 
1015  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1016  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1017  }
1019 #endif
1020  //**********************************************************************************************
1021 
1022  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1023 #if BLAZE_BLAS_MODE
1024 
1037  template< typename VT1 // Type of the left-hand side target vector
1038  , typename MT1 // Type of the left-hand side matrix operand
1039  , typename VT2 > // Type of the right-hand side vector operand
1040  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1041  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1042  {
1043  using boost::numeric_cast;
1044 
1045  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1046  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1047  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1048  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1049  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1050  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1051 
1052  const int M ( numeric_cast<int>( A.rows() ) );
1053  const int N ( numeric_cast<int>( A.columns() ) );
1054  const int lda( numeric_cast<int>( A.spacing() ) );
1055  const complex<double> alpha( 1.0, 0.0 );
1056  const complex<double> beta ( 1.0, 0.0 );
1057 
1058  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1059  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1060  }
1062 #endif
1063  //**********************************************************************************************
1064 
1065  //**Addition assignment to sparse vectors*******************************************************
1066  // No special implementation for the addition assignment to sparse vectors.
1067  //**********************************************************************************************
1068 
1069  //**Subtraction assignment to dense vectors*****************************************************
1081  template< typename VT1 > // Type of the target dense vector
1082  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1083  {
1085 
1086  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1087 
1088  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1089  return;
1090  }
1091 
1092  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1093  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1094 
1095  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1096  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1097  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1098  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1099 
1100  if( ( IsComputation<MT>::value && !evaluate ) ||
1101  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1102  DMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1103  else
1104  DMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1105  }
1107  //**********************************************************************************************
1108 
1109  //**Default subtraction assignment to dense vectors*********************************************
1123  template< typename VT1 // Type of the left-hand side target vector
1124  , typename MT1 // Type of the left-hand side matrix operand
1125  , typename VT2 > // Type of the right-hand side vector operand
1126  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1127  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1128  {
1129  y.subAssign( A * x );
1130  }
1132  //**********************************************************************************************
1133 
1134  //**Vectorized default subtraction assignment to dense vectors**********************************
1148  template< typename VT1 // Type of the left-hand side target vector
1149  , typename MT1 // Type of the left-hand side matrix operand
1150  , typename VT2 > // Type of the right-hand side vector operand
1151  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1152  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1153  {
1154  typedef IntrinsicTrait<ElementType> IT;
1155 
1156  const size_t M( A.rows() );
1157  const size_t N( A.columns() );
1158 
1159  size_t i( 0UL );
1160 
1161  for( ; (i+8UL) <= M; i+=8UL ) {
1162  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1163  for( size_t j=0UL; j<N; j+=IT::size ) {
1164  const IntrinsicType x1( x.get(j) );
1165  xmm1 = xmm1 + A.get(i ,j) * x1;
1166  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1167  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1168  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1169  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1170  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1171  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1172  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1173  }
1174  y[i ] -= sum( xmm1 );
1175  y[i+1UL] -= sum( xmm2 );
1176  y[i+2UL] -= sum( xmm3 );
1177  y[i+3UL] -= sum( xmm4 );
1178  y[i+4UL] -= sum( xmm5 );
1179  y[i+5UL] -= sum( xmm6 );
1180  y[i+6UL] -= sum( xmm7 );
1181  y[i+7UL] -= sum( xmm8 );
1182  }
1183  for( ; (i+4UL) <= M; i+=4UL ) {
1184  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1185  for( size_t j=0UL; j<N; j+=IT::size ) {
1186  const IntrinsicType x1( x.get(j) );
1187  xmm1 = xmm1 + A.get(i ,j) * x1;
1188  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1189  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1190  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1191  }
1192  y[i ] -= sum( xmm1 );
1193  y[i+1UL] -= sum( xmm2 );
1194  y[i+2UL] -= sum( xmm3 );
1195  y[i+3UL] -= sum( xmm4 );
1196  }
1197  for( ; (i+3UL) <= M; i+=3UL ) {
1198  IntrinsicType xmm1, xmm2, xmm3;
1199  for( size_t j=0UL; j<N; j+=IT::size ) {
1200  const IntrinsicType x1( x.get(j) );
1201  xmm1 = xmm1 + A.get(i ,j) * x1;
1202  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1203  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1204  }
1205  y[i ] -= sum( xmm1 );
1206  y[i+1UL] -= sum( xmm2 );
1207  y[i+2UL] -= sum( xmm3 );
1208  }
1209  for( ; (i+2UL) <= M; i+=2UL ) {
1210  IntrinsicType xmm1, xmm2;
1211  for( size_t j=0UL; j<N; j+=IT::size ) {
1212  const IntrinsicType x1( x.get(j) );
1213  xmm1 = xmm1 + A.get(i ,j) * x1;
1214  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1215  }
1216  y[i ] -= sum( xmm1 );
1217  y[i+1UL] -= sum( xmm2 );
1218  }
1219  if( i < M ) {
1220  IntrinsicType xmm1;
1221  for( size_t j=0UL; j<N; j+=IT::size ) {
1222  xmm1 = xmm1 + A.get(i,j) * x.get(j);
1223  }
1224  y[i] -= sum( xmm1 );
1225  }
1226  }
1228  //**********************************************************************************************
1229 
1230  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1244  template< typename VT1 // Type of the left-hand side target vector
1245  , typename MT1 // Type of the left-hand side matrix operand
1246  , typename VT2 > // Type of the right-hand side vector operand
1247  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1248  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1249  {
1250  selectDefaultSubAssignKernel( y, A, x );
1251  }
1253  //**********************************************************************************************
1254 
1255  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1256 #if BLAZE_BLAS_MODE
1257 
1270  template< typename VT1 // Type of the left-hand side target vector
1271  , typename MT1 // Type of the left-hand side matrix operand
1272  , typename VT2 > // Type of the right-hand side vector operand
1273  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1274  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1275  {
1276  using boost::numeric_cast;
1277 
1278  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1279  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1280  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1281 
1282  const int M ( numeric_cast<int>( A.rows() ) );
1283  const int N ( numeric_cast<int>( A.columns() ) );
1284  const int lda( numeric_cast<int>( A.spacing() ) );
1285 
1286  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1287  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1288  }
1290 #endif
1291  //**********************************************************************************************
1292 
1293  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1294 #if BLAZE_BLAS_MODE
1295 
1308  template< typename VT1 // Type of the left-hand side target vector
1309  , typename MT1 // Type of the left-hand side matrix operand
1310  , typename VT2 > // Type of the right-hand side vector operand
1311  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1312  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1313  {
1314  using boost::numeric_cast;
1315 
1316  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1317  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1318  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1319 
1320  const int M ( numeric_cast<int>( A.rows() ) );
1321  const int N ( numeric_cast<int>( A.columns() ) );
1322  const int lda( numeric_cast<int>( A.spacing() ) );
1323 
1324  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1325  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1326  }
1328 #endif
1329  //**********************************************************************************************
1330 
1331  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1332 #if BLAZE_BLAS_MODE
1333 
1346  template< typename VT1 // Type of the left-hand side target vector
1347  , typename MT1 // Type of the left-hand side matrix operand
1348  , typename VT2 > // Type of the right-hand side vector operand
1349  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1350  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1351  {
1352  using boost::numeric_cast;
1353 
1354  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1355  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1356  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1357  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1358  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1359  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1360 
1361  const int M ( numeric_cast<int>( A.rows() ) );
1362  const int N ( numeric_cast<int>( A.columns() ) );
1363  const int lda( numeric_cast<int>( A.spacing() ) );
1364  const complex<float> alpha( -1.0F, 0.0F );
1365  const complex<float> beta ( 1.0F, 0.0F );
1366 
1367  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1368  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1369  }
1371 #endif
1372  //**********************************************************************************************
1373 
1374  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1375 #if BLAZE_BLAS_MODE
1376 
1389  template< typename VT1 // Type of the left-hand side target vector
1390  , typename MT1 // Type of the left-hand side matrix operand
1391  , typename VT2 > // Type of the right-hand side vector operand
1392  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1393  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1394  {
1395  using boost::numeric_cast;
1396 
1397  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1398  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1399  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1400  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1401  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1402  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1403 
1404  const int M ( numeric_cast<int>( A.rows() ) );
1405  const int N ( numeric_cast<int>( A.columns() ) );
1406  const int lda( numeric_cast<int>( A.spacing() ) );
1407  const complex<double> alpha( -1.0, 0.0 );
1408  const complex<double> beta ( 1.0, 0.0 );
1409 
1410  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1411  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1412  }
1414 #endif
1415  //**********************************************************************************************
1416 
1417  //**Subtraction assignment to sparse vectors****************************************************
1418  // No special implementation for the subtraction assignment to sparse vectors.
1419  //**********************************************************************************************
1420 
1421  //**Multiplication assignment to dense vectors**************************************************
1433  template< typename VT1 > // Type of the target dense vector
1434  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1435  {
1437 
1440  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1441 
1442  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1443 
1444  const ResultType tmp( rhs );
1445  multAssign( ~lhs, tmp );
1446  }
1448  //**********************************************************************************************
1449 
1450  //**Multiplication assignment to sparse vectors*************************************************
1451  // No special implementation for the multiplication assignment to sparse vectors.
1452  //**********************************************************************************************
1453 
1454  //**Compile time checks*************************************************************************
1461  //**********************************************************************************************
1462 };
1463 //*************************************************************************************************
1464 
1465 
1466 
1467 
1468 //=================================================================================================
1469 //
1470 // DVECSCALARMULTEXPR SPECIALIZATION
1471 //
1472 //=================================================================================================
1473 
1474 //*************************************************************************************************
1482 template< typename MT // Type of the left-hand side dense matrix
1483  , typename VT // Type of the right-hand side dense vector
1484  , typename ST > // Type of the side scalar value
1485 class DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >
1486  : public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1487  , private Expression
1488  , private Computation
1489 {
1490  private:
1491  //**Type definitions****************************************************************************
1492  typedef DMatDVecMultExpr<MT,VT> MVM;
1493  typedef typename MVM::ResultType RES;
1494  typedef typename MT::ResultType MRT;
1495  typedef typename VT::ResultType VRT;
1496  typedef typename MRT::ElementType MET;
1497  typedef typename VRT::ElementType VET;
1498  typedef typename MT::CompositeType MCT;
1499  typedef typename VT::CompositeType VCT;
1500  //**********************************************************************************************
1501 
1502  //**********************************************************************************************
1504  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1505  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1506  //**********************************************************************************************
1507 
1508  //**********************************************************************************************
1510 
1513  template< typename T1, typename T2, typename T3, typename T4 >
1514  struct UseSinglePrecisionKernel {
1515  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1516  IsFloat<typename T1::ElementType>::value &&
1517  IsFloat<typename T2::ElementType>::value &&
1518  IsFloat<typename T3::ElementType>::value &&
1519  !IsComplex<T4>::value };
1520  };
1521  //**********************************************************************************************
1522 
1523  //**********************************************************************************************
1525 
1528  template< typename T1, typename T2, typename T3, typename T4 >
1529  struct UseDoublePrecisionKernel {
1530  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1531  IsDouble<typename T1::ElementType>::value &&
1532  IsDouble<typename T2::ElementType>::value &&
1533  IsDouble<typename T3::ElementType>::value &&
1534  !IsComplex<T4>::value };
1535  };
1536  //**********************************************************************************************
1537 
1538  //**********************************************************************************************
1540 
1543  template< typename T1, typename T2, typename T3 >
1544  struct UseSinglePrecisionComplexKernel {
1545  typedef complex<float> Type;
1546  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1547  IsSame<typename T1::ElementType,Type>::value &&
1548  IsSame<typename T2::ElementType,Type>::value &&
1549  IsSame<typename T3::ElementType,Type>::value };
1550  };
1551  //**********************************************************************************************
1552 
1553  //**********************************************************************************************
1555 
1558  template< typename T1, typename T2, typename T3 >
1559  struct UseDoublePrecisionComplexKernel {
1560  typedef complex<double> Type;
1561  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1562  IsSame<typename T1::ElementType,Type>::value &&
1563  IsSame<typename T2::ElementType,Type>::value &&
1564  IsSame<typename T3::ElementType,Type>::value };
1565  };
1566  //**********************************************************************************************
1567 
1568  //**********************************************************************************************
1570 
1572  template< typename T1, typename T2, typename T3, typename T4 >
1573  struct UseDefaultKernel {
1574  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1575  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1576  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1577  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1578  };
1579  //**********************************************************************************************
1580 
1581  //**********************************************************************************************
1583 
1586  template< typename T1, typename T2, typename T3, typename T4 >
1587  struct UseVectorizedDefaultKernel {
1588  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1589  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1590  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1591  IsSame<typename T1::ElementType,T4>::value &&
1592  IntrinsicTrait<typename T1::ElementType>::addition &&
1593  IntrinsicTrait<typename T1::ElementType>::multiplication };
1594  };
1595  //**********************************************************************************************
1596 
1597  public:
1598  //**Type definitions****************************************************************************
1599  typedef DVecScalarMultExpr<MVM,ST,false> This;
1600  typedef typename MultTrait<RES,ST>::Type ResultType;
1601  typedef typename ResultType::TransposeType TransposeType;
1602  typedef typename ResultType::ElementType ElementType;
1603  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1604  typedef const ElementType ReturnType;
1605  typedef const ResultType CompositeType;
1606 
1608  typedef const DMatDVecMultExpr<MT,VT> LeftOperand;
1609 
1611  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
1612 
1614  typedef typename SelectType< evaluate, const MRT, MCT >::Type LT;
1615 
1617  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
1618  //**********************************************************************************************
1619 
1620  //**Compilation flags***************************************************************************
1622  enum { vectorizable = 0 };
1623  //**********************************************************************************************
1624 
1625  //**Constructor*********************************************************************************
1631  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1632  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1633  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1634  {}
1635  //**********************************************************************************************
1636 
1637  //**Subscript operator**************************************************************************
1643  inline ReturnType operator[]( size_t index ) const {
1644  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1645  return vector_[index] * scalar_;
1646  }
1647  //**********************************************************************************************
1648 
1649  //**Size function*******************************************************************************
1654  inline size_t size() const {
1655  return vector_.size();
1656  }
1657  //**********************************************************************************************
1658 
1659  //**Left operand access*************************************************************************
1664  inline LeftOperand leftOperand() const {
1665  return vector_;
1666  }
1667  //**********************************************************************************************
1668 
1669  //**Right operand access************************************************************************
1674  inline RightOperand rightOperand() const {
1675  return scalar_;
1676  }
1677  //**********************************************************************************************
1678 
1679  //**********************************************************************************************
1685  template< typename T >
1686  inline bool canAlias( const T* alias ) const {
1687  return vector_.canAlias( alias );
1688  }
1689  //**********************************************************************************************
1690 
1691  //**********************************************************************************************
1697  template< typename T >
1698  inline bool isAliased( const T* alias ) const {
1699  return vector_.isAliased( alias );
1700  }
1701  //**********************************************************************************************
1702 
1703  private:
1704  //**Member variables****************************************************************************
1705  LeftOperand vector_;
1706  RightOperand scalar_;
1707  //**********************************************************************************************
1708 
1709  //**Assignment to dense vectors*****************************************************************
1721  template< typename VT1 > // Type of the target dense vector
1722  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1723  {
1725 
1726  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1727 
1728  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1729  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1730 
1731  if( left.rows() == 0UL ) {
1732  return;
1733  }
1734  else if( left.columns() == 0UL ) {
1735  reset( ~lhs );
1736  return;
1737  }
1738 
1739  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1740  RT x( right ); // Evaluation of the right-hand side dense vector operand
1741 
1742  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1743  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1744  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1745  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1746 
1747  if( ( IsComputation<MT>::value && !evaluate ) ||
1748  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1749  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1750  else
1751  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1752  }
1753  //**********************************************************************************************
1754 
1755  //**Default assignment to dense vectors*********************************************************
1769  template< typename VT1 // Type of the left-hand side target vector
1770  , typename MT1 // Type of the left-hand side matrix operand
1771  , typename VT2 // Type of the right-hand side vector operand
1772  , typename ST2 > // Type of the scalar value
1773  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1774  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1775  {
1776  y.assign( A * x * scalar );
1777  }
1778  //**********************************************************************************************
1779 
1780  //**Vectorized default assignment to dense vectors**********************************************
1794  template< typename VT1 // Type of the left-hand side target vector
1795  , typename MT1 // Type of the left-hand side matrix operand
1796  , typename VT2 // Type of the right-hand side vector operand
1797  , typename ST2 > // Type of the scalar value
1798  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1799  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1800  {
1801  typedef IntrinsicTrait<ElementType> IT;
1802 
1803  const size_t M( A.rows() );
1804  const size_t N( A.columns() );
1805 
1806  size_t i( 0UL );
1807 
1808  for( ; (i+8UL) <= M; i+=8UL ) {
1809  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1810  for( size_t j=0UL; j<N; j+=IT::size ) {
1811  const IntrinsicType x1( x.get(j) );
1812  xmm1 = xmm1 + A.get(i ,j) * x1;
1813  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1814  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1815  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1816  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1817  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1818  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1819  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1820  }
1821  y[i ] = sum( xmm1 ) * scalar;
1822  y[i+1UL] = sum( xmm2 ) * scalar;
1823  y[i+2UL] = sum( xmm3 ) * scalar;
1824  y[i+3UL] = sum( xmm4 ) * scalar;
1825  y[i+4UL] = sum( xmm5 ) * scalar;
1826  y[i+5UL] = sum( xmm6 ) * scalar;
1827  y[i+6UL] = sum( xmm7 ) * scalar;
1828  y[i+7UL] = sum( xmm8 ) * scalar;
1829  }
1830  for( ; (i+4UL) <= M; i+=4UL ) {
1831  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1832  for( size_t j=0UL; j<N; j+=IT::size ) {
1833  const IntrinsicType x1( x.get(j) );
1834  xmm1 = xmm1 + A.get(i ,j) * x1;
1835  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1836  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1837  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1838  }
1839  y[i ] = sum( xmm1 ) * scalar;
1840  y[i+1UL] = sum( xmm2 ) * scalar;
1841  y[i+2UL] = sum( xmm3 ) * scalar;
1842  y[i+3UL] = sum( xmm4 ) * scalar;
1843  }
1844  for( ; (i+3UL) <= M; i+=3UL ) {
1845  IntrinsicType xmm1, xmm2, xmm3;
1846  for( size_t j=0UL; j<N; j+=IT::size ) {
1847  const IntrinsicType x1( x.get(j) );
1848  xmm1 = xmm1 + A.get(i ,j) * x1;
1849  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1850  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1851  }
1852  y[i ] = sum( xmm1 ) * scalar;
1853  y[i+1UL] = sum( xmm2 ) * scalar;
1854  y[i+2UL] = sum( xmm3 ) * scalar;
1855  }
1856  for( ; (i+2UL) <= M; i+=2UL ) {
1857  IntrinsicType xmm1, xmm2;
1858  for( size_t j=0UL; j<N; j+=IT::size ) {
1859  const IntrinsicType x1( x.get(j) );
1860  xmm1 = xmm1 + A.get(i ,j) * x1;
1861  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1862  }
1863  y[i ] = sum( xmm1 ) * scalar;
1864  y[i+1UL] = sum( xmm2 ) * scalar;
1865  }
1866  if( i < M ) {
1867  IntrinsicType xmm1;
1868  for( size_t j=0UL; j<N; j+=IT::size ) {
1869  xmm1 = xmm1 + A.get(i,j) * x.get(j);
1870  }
1871  y[i] = sum( xmm1 ) * scalar;
1872  }
1873  }
1874  //**********************************************************************************************
1875 
1876  //**BLAS-based assignment to dense vectors (default)********************************************
1890  template< typename VT1 // Type of the left-hand side target vector
1891  , typename MT1 // Type of the left-hand side matrix operand
1892  , typename VT2 // Type of the right-hand side vector operand
1893  , typename ST2 > // Type of the scalar value
1894  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1895  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1896  {
1897  selectDefaultAssignKernel( y, A, x, scalar );
1898  }
1899  //**********************************************************************************************
1900 
1901  //**BLAS-based assignment to dense vectors (single precision)***********************************
1902 #if BLAZE_BLAS_MODE
1903 
1916  template< typename VT1 // Type of the left-hand side target vector
1917  , typename MT1 // Type of the left-hand side matrix operand
1918  , typename VT2 // Type of the right-hand side vector operand
1919  , typename ST2 > // Type of the scalar value
1920  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1921  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1922  {
1923  using boost::numeric_cast;
1924 
1925  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1926  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1927  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1928 
1929  const int M ( numeric_cast<int>( A.rows() ) );
1930  const int N ( numeric_cast<int>( A.columns() ) );
1931  const int lda( numeric_cast<int>( A.spacing() ) );
1932 
1933  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1934  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1935  }
1936 #endif
1937  //**********************************************************************************************
1938 
1939  //**BLAS-based assignment to dense vectors (double precision)***********************************
1940 #if BLAZE_BLAS_MODE
1941 
1954  template< typename VT1 // Type of the left-hand side target vector
1955  , typename MT1 // Type of the left-hand side matrix operand
1956  , typename VT2 // Type of the right-hand side vector operand
1957  , typename ST2 > // Type of the scalar value
1958  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1959  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1960  {
1961  using boost::numeric_cast;
1962 
1963  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1964  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1965  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1966 
1967  const int M ( numeric_cast<int>( A.rows() ) );
1968  const int N ( numeric_cast<int>( A.columns() ) );
1969  const int lda( numeric_cast<int>( A.spacing() ) );
1970 
1971  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1972  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1973  }
1974 #endif
1975  //**********************************************************************************************
1976 
1977  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1978 #if BLAZE_BLAS_MODE
1979 
1992  template< typename VT1 // Type of the left-hand side target vector
1993  , typename MT1 // Type of the left-hand side matrix operand
1994  , typename VT2 // Type of the right-hand side vector operand
1995  , typename ST2 > // Type of the scalar value
1996  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1997  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1998  {
1999  using boost::numeric_cast;
2000 
2001  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2002  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2003  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2005  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2006  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2007  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2008 
2009  const int M ( numeric_cast<int>( A.rows() ) );
2010  const int N ( numeric_cast<int>( A.columns() ) );
2011  const int lda( numeric_cast<int>( A.spacing() ) );
2012  const complex<float> alpha( scalar );
2013  const complex<float> beta ( 0.0F, 0.0F );
2014 
2015  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2016  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2017  }
2018 #endif
2019  //**********************************************************************************************
2020 
2021  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2022 #if BLAZE_BLAS_MODE
2023 
2036  template< typename VT1 // Type of the left-hand side target vector
2037  , typename MT1 // Type of the left-hand side matrix operand
2038  , typename VT2 // Type of the right-hand side vector operand
2039  , typename ST2 > // Type of the scalar value
2040  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2041  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2042  {
2043  using boost::numeric_cast;
2044 
2045  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2046  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2047  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2049  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2050  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2051  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2052 
2053  const int M ( numeric_cast<int>( A.rows() ) );
2054  const int N ( numeric_cast<int>( A.columns() ) );
2055  const int lda( numeric_cast<int>( A.spacing() ) );
2056  const complex<double> alpha( scalar );
2057  const complex<double> beta ( 0.0, 0.0 );
2058 
2059  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2060  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2061  }
2062 #endif
2063  //**********************************************************************************************
2064 
2065  //**Assignment to sparse vectors****************************************************************
2076  template< typename VT1 > // Type of the target sparse vector
2077  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2078  {
2080 
2083  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2084 
2085  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2086 
2087  const ResultType tmp( rhs );
2088  assign( ~lhs, tmp );
2089  }
2090  //**********************************************************************************************
2091 
2092  //**Addition assignment to dense vectors********************************************************
2104  template< typename VT1 > // Type of the target dense vector
2105  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2106  {
2108 
2109  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2110 
2111  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2112  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2113 
2114  if( left.rows() == 0UL || left.columns() == 0UL ) {
2115  return;
2116  }
2117 
2118  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2119  RT x( right ); // Evaluation of the right-hand side dense vector operand
2120 
2121  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2122  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2123  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2124  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2125 
2126  if( ( IsComputation<MT>::value && !evaluate ) ||
2127  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2128  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2129  else
2130  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2131  }
2132  //**********************************************************************************************
2133 
2134  //**Default addition assignment to dense vectors************************************************
2148  template< typename VT1 // Type of the left-hand side target vector
2149  , typename MT1 // Type of the left-hand side matrix operand
2150  , typename VT2 // Type of the right-hand side vector operand
2151  , typename ST2 > // Type of the scalar value
2152  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2153  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2154  {
2155  y.addAssign( A * x * scalar );
2156  }
2157  //**********************************************************************************************
2158 
2159  //**Vectorized default addition assignment to dense vectors*************************************
2173  template< typename VT1 // Type of the left-hand side target vector
2174  , typename MT1 // Type of the left-hand side matrix operand
2175  , typename VT2 // Type of the right-hand side vector operand
2176  , typename ST2 > // Type of the scalar value
2177  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2178  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2179  {
2180  typedef IntrinsicTrait<ElementType> IT;
2181 
2182  const size_t M( A.rows() );
2183  const size_t N( A.columns() );
2184 
2185  size_t i( 0UL );
2186 
2187  for( ; (i+8UL) <= M; i+=8UL ) {
2188  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2189  for( size_t j=0UL; j<N; j+=IT::size ) {
2190  const IntrinsicType x1( x.get(j) );
2191  xmm1 = xmm1 + A.get(i ,j) * x1;
2192  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2193  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2194  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2195  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2196  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2197  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2198  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2199  }
2200  y[i ] += sum( xmm1 ) * scalar;
2201  y[i+1UL] += sum( xmm2 ) * scalar;
2202  y[i+2UL] += sum( xmm3 ) * scalar;
2203  y[i+3UL] += sum( xmm4 ) * scalar;
2204  y[i+4UL] += sum( xmm5 ) * scalar;
2205  y[i+5UL] += sum( xmm6 ) * scalar;
2206  y[i+6UL] += sum( xmm7 ) * scalar;
2207  y[i+7UL] += sum( xmm8 ) * scalar;
2208  }
2209  for( ; (i+4UL) <= M; i+=4UL ) {
2210  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2211  for( size_t j=0UL; j<N; j+=IT::size ) {
2212  const IntrinsicType x1( x.get(j) );
2213  xmm1 = xmm1 + A.get(i ,j) * x1;
2214  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2215  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2216  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2217  }
2218  y[i ] += sum( xmm1 ) * scalar;
2219  y[i+1UL] += sum( xmm2 ) * scalar;
2220  y[i+2UL] += sum( xmm3 ) * scalar;
2221  y[i+3UL] += sum( xmm4 ) * scalar;
2222  }
2223  for( ; (i+3UL) <= M; i+=3UL ) {
2224  IntrinsicType xmm1, xmm2, xmm3;
2225  for( size_t j=0UL; j<N; j+=IT::size ) {
2226  const IntrinsicType x1( x.get(j) );
2227  xmm1 = xmm1 + A.get(i ,j) * x1;
2228  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2229  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2230  }
2231  y[i ] += sum( xmm1 ) * scalar;
2232  y[i+1UL] += sum( xmm2 ) * scalar;
2233  y[i+2UL] += sum( xmm3 ) * scalar;
2234  }
2235  for( ; (i+2UL) <= M; i+=2UL ) {
2236  IntrinsicType xmm1, xmm2;
2237  for( size_t j=0UL; j<N; j+=IT::size ) {
2238  const IntrinsicType x1( x.get(j) );
2239  xmm1 = xmm1 + A.get(i ,j) * x1;
2240  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2241  }
2242  y[i ] += sum( xmm1 ) * scalar;
2243  y[i+1UL] += sum( xmm2 ) * scalar;
2244  }
2245  if( i < M ) {
2246  IntrinsicType xmm1;
2247  for( size_t j=0UL; j<N; j+=IT::size ) {
2248  xmm1 = xmm1 + A.get(i,j) * x.get(j);
2249  }
2250  y[i] += sum( xmm1 ) * scalar;
2251  }
2252  }
2253  //**********************************************************************************************
2254 
2255  //**BLAS-based addition assignment to dense vectors (default)***********************************
2269  template< typename VT1 // Type of the left-hand side target vector
2270  , typename MT1 // Type of the left-hand side matrix operand
2271  , typename VT2 // Type of the right-hand side vector operand
2272  , typename ST2 > // Type of the scalar value
2273  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2274  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2275  {
2276  selectDefaultAddAssignKernel( y, A, x, scalar );
2277  }
2278  //**********************************************************************************************
2279 
2280  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2281 #if BLAZE_BLAS_MODE
2282 
2295  template< typename VT1 // Type of the left-hand side target vector
2296  , typename MT1 // Type of the left-hand side matrix operand
2297  , typename VT2 // Type of the right-hand side vector operand
2298  , typename ST2 > // Type of the scalar value
2299  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2300  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2301  {
2302  using boost::numeric_cast;
2303 
2304  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2305  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2306  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2307 
2308  const int M ( numeric_cast<int>( A.rows() ) );
2309  const int N ( numeric_cast<int>( A.columns() ) );
2310  const int lda( numeric_cast<int>( A.spacing() ) );
2311 
2312  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2313  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2314  }
2315 #endif
2316  //**********************************************************************************************
2317 
2318  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2319 #if BLAZE_BLAS_MODE
2320 
2333  template< typename VT1 // Type of the left-hand side target vector
2334  , typename MT1 // Type of the left-hand side matrix operand
2335  , typename VT2 // Type of the right-hand side vector operand
2336  , typename ST2 > // Type of the scalar value
2337  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2338  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2339  {
2340  using boost::numeric_cast;
2341 
2342  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2343  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2344  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2345 
2346  const int M ( numeric_cast<int>( A.rows() ) );
2347  const int N ( numeric_cast<int>( A.columns() ) );
2348  const int lda( numeric_cast<int>( A.spacing() ) );
2349 
2350  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2351  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2352  }
2353 #endif
2354  //**********************************************************************************************
2355 
2356  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2357 #if BLAZE_BLAS_MODE
2358 
2371  template< typename VT1 // Type of the left-hand side target vector
2372  , typename MT1 // Type of the left-hand side matrix operand
2373  , typename VT2 // Type of the right-hand side vector operand
2374  , typename ST2 > // Type of the scalar value
2375  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2376  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2377  {
2378  using boost::numeric_cast;
2379 
2380  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2381  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2382  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2384  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2385  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2386  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2387 
2388  const int M ( numeric_cast<int>( A.rows() ) );
2389  const int N ( numeric_cast<int>( A.columns() ) );
2390  const int lda( numeric_cast<int>( A.spacing() ) );
2391  const complex<float> alpha( scalar );
2392  const complex<float> beta ( 1.0F, 0.0F );
2393 
2394  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2395  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2396  }
2397 #endif
2398  //**********************************************************************************************
2399 
2400  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2401 #if BLAZE_BLAS_MODE
2402 
2415  template< typename VT1 // Type of the left-hand side target vector
2416  , typename MT1 // Type of the left-hand side matrix operand
2417  , typename VT2 // Type of the right-hand side vector operand
2418  , typename ST2 > // Type of the scalar value
2419  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2420  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2421  {
2422  using boost::numeric_cast;
2423 
2424  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2425  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2426  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2428  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2429  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2430  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2431 
2432  const int M ( numeric_cast<int>( A.rows() ) );
2433  const int N ( numeric_cast<int>( A.columns() ) );
2434  const int lda( numeric_cast<int>( A.spacing() ) );
2435  const complex<double> alpha( scalar );
2436  const complex<double> beta ( 1.0, 0.0 );
2437 
2438  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2439  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2440  }
2441 #endif
2442  //**********************************************************************************************
2443 
2444  //**Addition assignment to sparse vectors*******************************************************
2445  // No special implementation for the addition assignment to sparse vectors.
2446  //**********************************************************************************************
2447 
2448  //**Subtraction assignment to dense vectors*****************************************************
2460  template< typename VT1 > // Type of the target dense vector
2461  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2462  {
2464 
2465  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2466 
2467  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2468  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2469 
2470  if( left.rows() == 0UL || left.columns() == 0UL ) {
2471  return;
2472  }
2473 
2474  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2475  RT x( right ); // Evaluation of the right-hand side dense vector operand
2476 
2477  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2478  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2479  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2480  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2481 
2482  if( ( IsComputation<MT>::value && !evaluate ) ||
2483  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2484  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2485  else
2486  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2487  }
2488  //**********************************************************************************************
2489 
2490  //**Default subtraction assignment to dense vectors*********************************************
2504  template< typename VT1 // Type of the left-hand side target vector
2505  , typename MT1 // Type of the left-hand side matrix operand
2506  , typename VT2 // Type of the right-hand side vector operand
2507  , typename ST2 > // Type of the scalar value
2508  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2509  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2510  {
2511  y.subAssign( A * x * scalar );
2512  }
2513  //**********************************************************************************************
2514 
2515  //**Vectorized default subtraction assignment to dense vectors**********************************
2529  template< typename VT1 // Type of the left-hand side target vector
2530  , typename MT1 // Type of the left-hand side matrix operand
2531  , typename VT2 // Type of the right-hand side vector operand
2532  , typename ST2 > // Type of the scalar value
2533  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2534  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2535  {
2536  typedef IntrinsicTrait<ElementType> IT;
2537 
2538  const size_t M( A.rows() );
2539  const size_t N( A.columns() );
2540 
2541  size_t i( 0UL );
2542 
2543  for( ; (i+8UL) <= M; i+=8UL ) {
2544  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2545  for( size_t j=0UL; j<N; j+=IT::size ) {
2546  const IntrinsicType x1( x.get(j) );
2547  xmm1 = xmm1 + A.get(i ,j) * x1;
2548  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2549  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2550  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2551  xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2552  xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2553  xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2554  xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2555  }
2556  y[i ] -= sum( xmm1 ) * scalar;
2557  y[i+1UL] -= sum( xmm2 ) * scalar;
2558  y[i+2UL] -= sum( xmm3 ) * scalar;
2559  y[i+3UL] -= sum( xmm4 ) * scalar;
2560  y[i+4UL] -= sum( xmm5 ) * scalar;
2561  y[i+5UL] -= sum( xmm6 ) * scalar;
2562  y[i+6UL] -= sum( xmm7 ) * scalar;
2563  y[i+7UL] -= sum( xmm8 ) * scalar;
2564  }
2565  for( ; (i+4UL) <= M; i+=4UL ) {
2566  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2567  for( size_t j=0UL; j<N; j+=IT::size ) {
2568  const IntrinsicType x1( x.get(j) );
2569  xmm1 = xmm1 + A.get(i ,j) * x1;
2570  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2571  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2572  xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2573  }
2574  y[i ] -= sum( xmm1 ) * scalar;
2575  y[i+1UL] -= sum( xmm2 ) * scalar;
2576  y[i+2UL] -= sum( xmm3 ) * scalar;
2577  y[i+3UL] -= sum( xmm4 ) * scalar;
2578  }
2579  for( ; (i+3UL) <= M; i+=3UL ) {
2580  IntrinsicType xmm1, xmm2, xmm3;
2581  for( size_t j=0UL; j<N; j+=IT::size ) {
2582  const IntrinsicType x1( x.get(j) );
2583  xmm1 = xmm1 + A.get(i ,j) * x1;
2584  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2585  xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2586  }
2587  y[i ] -= sum( xmm1 ) * scalar;
2588  y[i+1UL] -= sum( xmm2 ) * scalar;
2589  y[i+2UL] -= sum( xmm3 ) * scalar;
2590  }
2591  for( ; (i+2UL) <= M; i+=2UL ) {
2592  IntrinsicType xmm1, xmm2;
2593  for( size_t j=0UL; j<N; j+=IT::size ) {
2594  const IntrinsicType x1( x.get(j) );
2595  xmm1 = xmm1 + A.get(i ,j) * x1;
2596  xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2597  }
2598  y[i ] -= sum( xmm1 ) * scalar;
2599  y[i+1UL] -= sum( xmm2 ) * scalar;
2600  }
2601  if( i < M ) {
2602  IntrinsicType xmm1;
2603  for( size_t j=0UL; j<N; j+=IT::size ) {
2604  xmm1 = xmm1 + A.get(i,j) * x.get(j);
2605  }
2606  y[i] -= sum( xmm1 ) * scalar;
2607  }
2608  }
2609  //**********************************************************************************************
2610 
2611  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2625  template< typename VT1 // Type of the left-hand side target vector
2626  , typename MT1 // Type of the left-hand side matrix operand
2627  , typename VT2 // Type of the right-hand side vector operand
2628  , typename ST2 > // Type of the scalar value
2629  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2630  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2631  {
2632  selectDefaultSubAssignKernel( y, A, x, scalar );
2633  }
2634  //**********************************************************************************************
2635 
2636  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2637 #if BLAZE_BLAS_MODE
2638 
2651  template< typename VT1 // Type of the left-hand side target vector
2652  , typename MT1 // Type of the left-hand side matrix operand
2653  , typename VT2 // Type of the right-hand side vector operand
2654  , typename ST2 > // Type of the scalar value
2655  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2656  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2657  {
2658  using boost::numeric_cast;
2659 
2660  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2661  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2662  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2663 
2664  const int M ( numeric_cast<int>( A.rows() ) );
2665  const int N ( numeric_cast<int>( A.columns() ) );
2666  const int lda( numeric_cast<int>( A.spacing() ) );
2667 
2668  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2669  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2670  }
2671 #endif
2672  //**********************************************************************************************
2673 
2674  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2675 #if BLAZE_BLAS_MODE
2676 
2689  template< typename VT1 // Type of the left-hand side target vector
2690  , typename MT1 // Type of the left-hand side matrix operand
2691  , typename VT2 // Type of the right-hand side vector operand
2692  , typename ST2 > // Type of the scalar value
2693  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2694  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2695  {
2696  using boost::numeric_cast;
2697 
2698  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2699  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2700  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2701 
2702  const int M ( numeric_cast<int>( A.rows() ) );
2703  const int N ( numeric_cast<int>( A.columns() ) );
2704  const int lda( numeric_cast<int>( A.spacing() ) );
2705 
2706  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2707  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2708  }
2709 #endif
2710  //**********************************************************************************************
2711 
2712  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2713 #if BLAZE_BLAS_MODE
2714 
2727  template< typename VT1 // Type of the left-hand side target vector
2728  , typename MT1 // Type of the left-hand side matrix operand
2729  , typename VT2 // Type of the right-hand side vector operand
2730  , typename ST2 > // Type of the scalar value
2731  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2732  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2733  {
2734  using boost::numeric_cast;
2735 
2736  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2737  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2738  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2740  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2741  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2742  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2743 
2744  const int M ( numeric_cast<int>( A.rows() ) );
2745  const int N ( numeric_cast<int>( A.columns() ) );
2746  const int lda( numeric_cast<int>( A.spacing() ) );
2747  const complex<float> alpha( -scalar );
2748  const complex<float> beta ( 1.0F, 0.0F );
2749 
2750  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2751  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2752  }
2753 #endif
2754  //**********************************************************************************************
2755 
2756  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2757 #if BLAZE_BLAS_MODE
2758 
2771  template< typename VT1 // Type of the left-hand side target vector
2772  , typename MT1 // Type of the left-hand side matrix operand
2773  , typename VT2 // Type of the right-hand side vector operand
2774  , typename ST2 > // Type of the scalar value
2775  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2776  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2777  {
2778  using boost::numeric_cast;
2779 
2780  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2781  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2782  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2784  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2785  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2786  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2787 
2788  const int M ( numeric_cast<int>( A.rows() ) );
2789  const int N ( numeric_cast<int>( A.columns() ) );
2790  const int lda( numeric_cast<int>( A.spacing() ) );
2791  const complex<double> alpha( -scalar );
2792  const complex<double> beta ( 1.0, 0.0 );
2793 
2794  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2795  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2796  }
2797 #endif
2798  //**********************************************************************************************
2799 
2800  //**Subtraction assignment to sparse vectors****************************************************
2801  // No special implementation for the subtraction assignment to sparse vectors.
2802  //**********************************************************************************************
2803 
2804  //**Multiplication assignment to dense vectors**************************************************
2816  template< typename VT1 > // Type of the target dense vector
2817  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2818  {
2820 
2823  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2824 
2825  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2826 
2827  const ResultType tmp( rhs );
2828  multAssign( ~lhs, tmp );
2829  }
2830  //**********************************************************************************************
2831 
2832  //**Multiplication assignment to sparse vectors*************************************************
2833  // No special implementation for the multiplication assignment to sparse vectors.
2834  //**********************************************************************************************
2835 
2836  //**Compile time checks*************************************************************************
2844  //**********************************************************************************************
2845 };
2847 //*************************************************************************************************
2848 
2849 
2850 
2851 
2852 //=================================================================================================
2853 //
2854 // GLOBAL BINARY ARITHMETIC OPERATORS
2855 //
2856 //=================================================================================================
2857 
2858 //*************************************************************************************************
2888 template< typename T1 // Type of the left-hand side dense matrix
2889  , typename T2 > // Type of the right-hand side dense vector
2890 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
2892 {
2894 
2895  if( (~mat).columns() != (~vec).size() )
2896  throw std::invalid_argument( "Matrix and vector sizes do not match" );
2897 
2898  return DMatDVecMultExpr<T1,T2>( ~mat, ~vec );
2899 }
2900 //*************************************************************************************************
2901 
2902 
2903 
2904 
2905 //=================================================================================================
2906 //
2907 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
2908 //
2909 //=================================================================================================
2910 
2911 //*************************************************************************************************
2924 template< typename T1 // Type of the left-hand side dense matrix
2925  , bool SO // Storage order of the left-hand side dense matrix
2926  , typename T2 > // Type of the right-hand side dense vector
2927 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
2929 {
2931 
2932  return (~mat).leftOperand() * ( (~mat).rightOperand() * vec );
2933 }
2934 //*************************************************************************************************
2935 
2936 } // namespace blaze
2937 
2938 #endif