All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
47 #include <blaze/system/BLAS.h>
49 #include <blaze/util/Assert.h>
50 #include <blaze/util/Complex.h>
55 #include <blaze/util/DisableIf.h>
56 #include <blaze/util/EnableIf.h>
58 #include <blaze/util/SelectType.h>
59 #include <blaze/util/Types.h>
65 
66 
67 namespace blaze {
68 
69 //=================================================================================================
70 //
71 // CLASS TDMATDVECMULTEXPR
72 //
73 //=================================================================================================
74 
75 //*************************************************************************************************
82 template< typename MT // Type of the left-hand side dense matrix
83  , typename VT > // Type of the right-hand side dense vector
84 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
85  , private Expression
86  , private Computation
87 {
88  private:
89  //**Type definitions****************************************************************************
90  typedef typename MT::ResultType MRT;
91  typedef typename VT::ResultType VRT;
92  typedef typename MRT::ElementType MET;
93  typedef typename VRT::ElementType VET;
94  typedef typename MT::CompositeType MCT;
95  typedef typename VT::CompositeType VCT;
96  //**********************************************************************************************
97 
98  //**********************************************************************************************
100  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
102  //**********************************************************************************************
103 
104  //**********************************************************************************************
106 
107 
110  template< typename T1, typename T2, typename T3 >
111  struct UseSinglePrecisionKernel {
112  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
116  };
118  //**********************************************************************************************
119 
120  //**********************************************************************************************
122 
123 
126  template< typename T1, typename T2, typename T3 >
127  struct UseDoublePrecisionKernel {
128  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
129  IsDouble<typename T1::ElementType>::value &&
130  IsDouble<typename T2::ElementType>::value &&
131  IsDouble<typename T3::ElementType>::value };
132  };
134  //**********************************************************************************************
135 
136  //**********************************************************************************************
138 
139 
142  template< typename T1, typename T2, typename T3 >
143  struct UseSinglePrecisionComplexKernel {
144  typedef complex<float> Type;
145  enum { value = IsSame<typename T1::ElementType,Type>::value &&
146  IsSame<typename T2::ElementType,Type>::value &&
147  IsSame<typename T3::ElementType,Type>::value };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
155 
158  template< typename T1, typename T2, typename T3 >
159  struct UseDoublePrecisionComplexKernel {
160  typedef complex<double> Type;
161  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
162  IsSame<typename T1::ElementType,Type>::value &&
163  IsSame<typename T2::ElementType,Type>::value &&
164  IsSame<typename T3::ElementType,Type>::value };
165  };
167  //**********************************************************************************************
168 
169  //**********************************************************************************************
171 
172 
174  template< typename T1, typename T2, typename T3 >
175  struct UseDefaultKernel {
176  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
177  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
178  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
179  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
180  };
182  //**********************************************************************************************
183 
184  //**********************************************************************************************
186 
187 
190  template< typename T1, typename T2, typename T3 >
191  struct UseVectorizedDefaultKernel {
192  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
193  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
194  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
195  IntrinsicTrait<typename T1::ElementType>::addition &&
196  IntrinsicTrait<typename T1::ElementType>::multiplication };
197  };
199  //**********************************************************************************************
200 
201  public:
202  //**Type definitions****************************************************************************
205  typedef typename ResultType::TransposeType TransposeType;
206  typedef typename ResultType::ElementType ElementType;
208  typedef const ElementType ReturnType;
209  typedef const ResultType CompositeType;
210 
212  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
213 
215  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
216 
219 
221  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
222  //**********************************************************************************************
223 
224  //**Compilation flags***************************************************************************
226  enum { vectorizable = 0 };
227  //**********************************************************************************************
228 
229  //**Constructor*********************************************************************************
235  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
236  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
237  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
238  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
239  {
240  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
241  }
242  //**********************************************************************************************
243 
244  //**Subscript operator**************************************************************************
250  inline ReturnType operator[]( size_t index ) const {
251  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
252 
253  ElementType res;
254 
255  if( mat_.columns() != 0UL ) {
256  res = mat_(index,0UL) * vec_[0UL];
257  for( size_t j=1UL; j<end_; j+=2UL ) {
258  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
259  }
260  if( end_ < mat_.columns() ) {
261  res += mat_(index,end_) * vec_[end_];
262  }
263  }
264  else {
265  reset( res );
266  }
267 
268  return res;
269  }
270  //**********************************************************************************************
271 
272  //**Size function*******************************************************************************
277  inline size_t size() const {
278  return mat_.rows();
279  }
280  //**********************************************************************************************
281 
282  //**Left function*******************************************************************************
287  inline LeftOperand leftOperand() const {
288  return mat_;
289  }
290  //**********************************************************************************************
291 
292  //**Right function******************************************************************************
297  inline RightOperand rightOperand() const {
298  return vec_;
299  }
300  //**********************************************************************************************
301 
302  //**********************************************************************************************
308  template< typename T >
309  inline bool canAlias( const T* alias ) const {
310  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
311  }
312  //**********************************************************************************************
313 
314  //**********************************************************************************************
320  template< typename T >
321  inline bool isAliased( const T* alias ) const {
322  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
323  }
324  //**********************************************************************************************
325 
326  private:
327  //**Member variables****************************************************************************
330  const size_t end_;
331  //**********************************************************************************************
332 
333  //**Assignment to dense vectors*****************************************************************
345  template< typename VT1 > // Type of the target dense vector
346  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
347  {
349 
350  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
351 
352  if( rhs.mat_.rows() == 0UL ) {
353  return;
354  }
355  else if( rhs.mat_.columns() == 0UL ) {
356  reset( ~lhs );
357  return;
358  }
359 
360  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
361  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
362 
363  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
364  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
365  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
366  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
367 
368  if( ( IsComputation<MT>::value && !evaluate ) ||
369  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
370  TDMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
371  else
372  TDMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
373  }
375  //**********************************************************************************************
376 
377  //**Default assignment to dense vectors*********************************************************
391  template< typename VT1 // Type of the left-hand side target vector
392  , typename MT1 // Type of the left-hand side matrix operand
393  , typename VT2 > // Type of the right-hand side vector operand
394  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
395  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
396  {
397  const size_t M( A.rows() );
398  const size_t N( A.columns() );
399 
400  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
401  const size_t iend( M & size_t(-2) );
402 
403  for( size_t i=0UL; i<M; ++i ) {
404  y[i] = x[0UL] * A(i,0UL);
405  }
406  for( size_t j=1UL; j<N; ++j ) {
407  for( size_t i=0UL; i<iend; i+=2UL ) {
408  y[i ] += x[j] * A(i ,j);
409  y[i+1UL] += x[j] * A(i+1UL,j);
410  }
411  if( iend < M ) {
412  y[iend] += x[j] * A(iend,j);
413  }
414  }
415  }
417  //**********************************************************************************************
418 
419  //**Vectorized default assignment to dense vectors**********************************************
433  template< typename VT1 // Type of the left-hand side target vector
434  , typename MT1 // Type of the left-hand side matrix operand
435  , typename VT2 > // Type of the right-hand side vector operand
436  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
437  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
438  {
439  typedef IntrinsicTrait<ElementType> IT;
440 
441  const size_t M( A.spacing() );
442  const size_t N( A.columns() );
443 
444  size_t i( 0UL );
445 
446  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
447  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
448  for( size_t j=0UL; j<N; ++j ) {
449  const IntrinsicType x1( set( x[j] ) );
450  xmm1 = xmm1 + A.get(i ,j) * x1;
451  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
452  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
453  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
454  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
455  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
456  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
457  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
458  }
459  store( &y[i ], xmm1 );
460  store( &y[i+IT::size ], xmm2 );
461  store( &y[i+IT::size*2UL], xmm3 );
462  store( &y[i+IT::size*3UL], xmm4 );
463  store( &y[i+IT::size*4UL], xmm5 );
464  store( &y[i+IT::size*5UL], xmm6 );
465  store( &y[i+IT::size*6UL], xmm7 );
466  store( &y[i+IT::size*7UL], xmm8 );
467  }
468  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
469  IntrinsicType xmm1, xmm2, xmm3, xmm4;
470  for( size_t j=0UL; j<N; ++j ) {
471  const IntrinsicType x1( set( x[j] ) );
472  xmm1 = xmm1 + A.get(i ,j) * x1;
473  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
474  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
475  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
476  }
477  store( &y[i ], xmm1 );
478  store( &y[i+IT::size ], xmm2 );
479  store( &y[i+IT::size*2UL], xmm3 );
480  store( &y[i+IT::size*3UL], xmm4 );
481  }
482  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
483  IntrinsicType xmm1, xmm2, xmm3;
484  for( size_t j=0UL; j<N; ++j ) {
485  const IntrinsicType x1( set( x[j] ) );
486  xmm1 = xmm1 + A.get(i ,j) * x1;
487  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
488  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
489  }
490  store( &y[i ], xmm1 );
491  store( &y[i+IT::size ], xmm2 );
492  store( &y[i+IT::size*2UL], xmm3 );
493  }
494  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
495  IntrinsicType xmm1, xmm2;
496  for( size_t j=0UL; j<N; ++j ) {
497  const IntrinsicType x1( set( x[j] ) );
498  xmm1 = xmm1 + A.get(i ,j) * x1;
499  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
500  }
501  store( &y[i ], xmm1 );
502  store( &y[i+IT::size], xmm2 );
503  }
504  if( i < M ) {
505  IntrinsicType xmm1;
506  for( size_t j=0UL; j<N; ++j ) {
507  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
508  }
509  store( &y[i], xmm1 );
510  }
511  }
513  //**********************************************************************************************
514 
515  //**BLAS-based assignment to dense vectors (default)********************************************
529  template< typename VT1 // Type of the left-hand side target vector
530  , typename MT1 // Type of the left-hand side matrix operand
531  , typename VT2 > // Type of the right-hand side vector operand
532  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
533  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
534  {
535  selectDefaultAssignKernel( y, A, x );
536  }
538  //**********************************************************************************************
539 
540  //**BLAS-based assignment to dense vectors (single precision)***********************************
541 #if BLAZE_BLAS_MODE
542 
555  template< typename VT1 // Type of the left-hand side target vector
556  , typename MT1 // Type of the left-hand side matrix operand
557  , typename VT2 > // Type of the right-hand side vector operand
558  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
559  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
560  {
561  using boost::numeric_cast;
562 
563  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
564  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
565  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
566 
567  const int M ( numeric_cast<int>( A.rows() ) );
568  const int N ( numeric_cast<int>( A.columns() ) );
569  const int lda( numeric_cast<int>( A.spacing() ) );
570 
571  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
572  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
573  }
575 #endif
576  //**********************************************************************************************
577 
578  //**BLAS-based assignment to dense vectors (double precision)***********************************
579 #if BLAZE_BLAS_MODE
580 
593  template< typename VT1 // Type of the left-hand side target vector
594  , typename MT1 // Type of the left-hand side matrix operand
595  , typename VT2 > // Type of the right-hand side vector operand
596  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
597  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
598  {
599  using boost::numeric_cast;
600 
601  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
602  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
603  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
604 
605  const int M ( numeric_cast<int>( A.rows() ) );
606  const int N ( numeric_cast<int>( A.columns() ) );
607  const int lda( numeric_cast<int>( A.spacing() ) );
608 
609  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
610  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
611  }
613 #endif
614  //**********************************************************************************************
615 
616  //**BLAS-based assignment to dense vectors (single precision complex)***************************
617 #if BLAZE_BLAS_MODE
618 
631  template< typename VT1 // Type of the left-hand side target vector
632  , typename MT1 // Type of the left-hand side matrix operand
633  , typename VT2 > // Type of the right-hand side vector operand
634  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
635  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
636  {
637  using boost::numeric_cast;
638 
639  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
640  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
641  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
642  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
643  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
644  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
645 
646  const int M ( numeric_cast<int>( A.rows() ) );
647  const int N ( numeric_cast<int>( A.columns() ) );
648  const int lda( numeric_cast<int>( A.spacing() ) );
649  const complex<float> alpha( 1.0F, 0.0F );
650  const complex<float> beta ( 0.0F, 0.0F );
651 
652  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
653  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
654  }
656 #endif
657  //**********************************************************************************************
658 
659  //**BLAS-based assignment to dense vectors (double precision complex)***************************
660 #if BLAZE_BLAS_MODE
661 
674  template< typename VT1 // Type of the left-hand side target vector
675  , typename MT1 // Type of the left-hand side matrix operand
676  , typename VT2 > // Type of the right-hand side vector operand
677  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
678  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
679  {
680  using boost::numeric_cast;
681 
682  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
683  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
684  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
685  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
686  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
687  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
688 
689  const int M ( numeric_cast<int>( A.rows() ) );
690  const int N ( numeric_cast<int>( A.columns() ) );
691  const int lda( numeric_cast<int>( A.spacing() ) );
692  const complex<double> alpha( 1.0, 0.0 );
693  const complex<double> beta ( 0.0, 0.0 );
694 
695  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
696  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
697  }
699 #endif
700  //**********************************************************************************************
701 
702  //**Assignment to sparse vectors****************************************************************
714  template< typename VT1 > // Type of the target sparse vector
715  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
716  {
718 
721  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
722 
723  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
724 
725  const ResultType tmp( rhs );
726  assign( ~lhs, tmp );
727  }
729  //**********************************************************************************************
730 
731  //**Addition assignment to dense vectors********************************************************
744  template< typename VT1 > // Type of the target dense vector
745  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
746  {
748 
749  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
750 
751  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
752  return;
753  }
754 
755  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
756  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
757 
758  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
759  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
760  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
761  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
762 
763  if( ( IsComputation<MT>::value && !evaluate ) ||
764  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
765  TDMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
766  else
767  TDMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
768  }
770  //**********************************************************************************************
771 
772  //**Default addition assignment to dense vectors************************************************
786  template< typename VT1 // Type of the left-hand side target vector
787  , typename MT1 // Type of the left-hand side matrix operand
788  , typename VT2 > // Type of the right-hand side vector operand
789  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
790  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
791  {
792  const size_t M( A.rows() );
793  const size_t N( A.columns() );
794 
795  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
796  const size_t iend( M & size_t(-2) );
797 
798  for( size_t j=0UL; j<N; ++j ) {
799  for( size_t i=0UL; i<iend; i+=2UL ) {
800  y[i ] += x[j] * A(i ,j);
801  y[i+1UL] += x[j] * A(i+1UL,j);
802  }
803  if( iend < M ) {
804  y[iend] += x[j] * A(iend,j);
805  }
806  }
807  }
809  //**********************************************************************************************
810 
811  //**Vectorized default addition assignment to dense vectors*************************************
825  template< typename VT1 // Type of the left-hand side target vector
826  , typename MT1 // Type of the left-hand side matrix operand
827  , typename VT2 > // Type of the right-hand side vector operand
828  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
829  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
830  {
831  typedef IntrinsicTrait<ElementType> IT;
832 
833  const size_t M( A.spacing() );
834  const size_t N( A.columns() );
835 
836  size_t i( 0UL );
837 
838  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
839  IntrinsicType xmm1( load( &y[i ] ) );
840  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
841  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
842  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
843  IntrinsicType xmm5( load( &y[i+IT::size*4UL] ) );
844  IntrinsicType xmm6( load( &y[i+IT::size*5UL] ) );
845  IntrinsicType xmm7( load( &y[i+IT::size*6UL] ) );
846  IntrinsicType xmm8( load( &y[i+IT::size*7UL] ) );
847  for( size_t j=0UL; j<N; ++j ) {
848  const IntrinsicType x1( set( x[j] ) );
849  xmm1 = xmm1 + A.get(i ,j) * x1;
850  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
851  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
852  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
853  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
854  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
855  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
856  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
857  }
858  store( &y[i ], xmm1 );
859  store( &y[i+IT::size ], xmm2 );
860  store( &y[i+IT::size*2UL], xmm3 );
861  store( &y[i+IT::size*3UL], xmm4 );
862  store( &y[i+IT::size*4UL], xmm5 );
863  store( &y[i+IT::size*5UL], xmm6 );
864  store( &y[i+IT::size*6UL], xmm7 );
865  store( &y[i+IT::size*7UL], xmm8 );
866  }
867  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
868  IntrinsicType xmm1( load( &y[i ] ) );
869  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
870  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
871  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
872  for( size_t j=0UL; j<N; ++j ) {
873  const IntrinsicType x1( set( x[j] ) );
874  xmm1 = xmm1 + A.get(i ,j) * x1;
875  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
876  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
877  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
878  }
879  store( &y[i ], xmm1 );
880  store( &y[i+IT::size ], xmm2 );
881  store( &y[i+IT::size*2UL], xmm3 );
882  store( &y[i+IT::size*3UL], xmm4 );
883  }
884  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
885  IntrinsicType xmm1( load( &y[i ] ) );
886  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
887  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
888  for( size_t j=0UL; j<N; ++j ) {
889  const IntrinsicType x1( set( x[j] ) );
890  xmm1 = xmm1 + A.get(i ,j) * x1;
891  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
892  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
893  }
894  store( &y[i ], xmm1 );
895  store( &y[i+IT::size ], xmm2 );
896  store( &y[i+IT::size*2UL], xmm3 );
897  }
898  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
899  IntrinsicType xmm1( load( &y[i ] ) );
900  IntrinsicType xmm2( load( &y[i+IT::size] ) );
901  for( size_t j=0UL; j<N; ++j ) {
902  const IntrinsicType x1( set( x[j] ) );
903  xmm1 = xmm1 + A.get(i ,j) * x1;
904  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
905  }
906  store( &y[i ], xmm1 );
907  store( &y[i+IT::size], xmm2 );
908  }
909  if( i < M ) {
910  IntrinsicType xmm1( load( &y[i] ) );
911  for( size_t j=0UL; j<N; ++j ) {
912  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
913  }
914  store( &y[i], xmm1 );
915  }
916  }
918  //**********************************************************************************************
919 
920  //**BLAS-based addition assignment to dense vectors (default)***********************************
934  template< typename VT1 // Type of the left-hand side target vector
935  , typename MT1 // Type of the left-hand side matrix operand
936  , typename VT2 > // Type of the right-hand side vector operand
937  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
938  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
939  {
940  selectDefaultAddAssignKernel( y, A, x );
941  }
943  //**********************************************************************************************
944 
945  //**BLAS-based addition assignment to dense vectors (single precision)**************************
946 #if BLAZE_BLAS_MODE
947 
960  template< typename VT1 // Type of the left-hand side target vector
961  , typename MT1 // Type of the left-hand side matrix operand
962  , typename VT2 > // Type of the right-hand side vector operand
963  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
964  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
965  {
966  using boost::numeric_cast;
967 
968  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
969  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
970  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
971 
972  const int M ( numeric_cast<int>( A.rows() ) );
973  const int N ( numeric_cast<int>( A.columns() ) );
974  const int lda( numeric_cast<int>( A.spacing() ) );
975 
976  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
977  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
978  }
980 #endif
981  //**********************************************************************************************
982 
983  //**BLAS-based addition assignment to dense vectors (double precision)**************************
984 #if BLAZE_BLAS_MODE
985 
998  template< typename VT1 // Type of the left-hand side target vector
999  , typename MT1 // Type of the left-hand side matrix operand
1000  , typename VT2 > // Type of the right-hand side vector operand
1001  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1002  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1003  {
1004  using boost::numeric_cast;
1005 
1006  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1007  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1008  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1009 
1010  const int M ( numeric_cast<int>( A.rows() ) );
1011  const int N ( numeric_cast<int>( A.columns() ) );
1012  const int lda( numeric_cast<int>( A.spacing() ) );
1013 
1014  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
1015  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1016  }
1018 #endif
1019  //**********************************************************************************************
1020 
1021  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1022 #if BLAZE_BLAS_MODE
1023 
1036  template< typename VT1 // Type of the left-hand side target vector
1037  , typename MT1 // Type of the left-hand side matrix operand
1038  , typename VT2 > // Type of the right-hand side vector operand
1039  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1040  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1041  {
1042  using boost::numeric_cast;
1043 
1044  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1045  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1046  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1047  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1048  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1049  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1050 
1051  const int M ( numeric_cast<int>( A.rows() ) );
1052  const int N ( numeric_cast<int>( A.columns() ) );
1053  const int lda( numeric_cast<int>( A.spacing() ) );
1054  const complex<float> alpha( 1.0F, 0.0F );
1055  const complex<float> beta ( 1.0F, 0.0F );
1056 
1057  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1058  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1059  }
1061 #endif
1062  //**********************************************************************************************
1063 
1064  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1065 #if BLAZE_BLAS_MODE
1066 
1079  template< typename VT1 // Type of the left-hand side target vector
1080  , typename MT1 // Type of the left-hand side matrix operand
1081  , typename VT2 > // Type of the right-hand side vector operand
1082  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1083  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1084  {
1085  using boost::numeric_cast;
1086 
1087  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1088  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1089  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1090  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1091  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1092  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1093 
1094  const int M ( numeric_cast<int>( A.rows() ) );
1095  const int N ( numeric_cast<int>( A.columns() ) );
1096  const int lda( numeric_cast<int>( A.spacing() ) );
1097  const complex<double> alpha( 1.0, 0.0 );
1098  const complex<double> beta ( 1.0, 0.0 );
1099 
1100  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1101  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1102  }
1104 #endif
1105  //**********************************************************************************************
1106 
1107  //**Addition assignment to sparse vectors*******************************************************
1108  // No special implementation for the addition assignment to sparse vectors.
1109  //**********************************************************************************************
1110 
1111  //**Subtraction assignment to dense vectors*****************************************************
1124  template< typename VT1 > // Type of the target dense vector
1125  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1126  {
1128 
1129  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1130 
1131  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1132  return;
1133  }
1134 
1135  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1136  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1137 
1138  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1139  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1140  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1141  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1142 
1143  if( ( IsComputation<MT>::value && !evaluate ) ||
1144  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1145  TDMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1146  else
1147  TDMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1148  }
1150  //**********************************************************************************************
1151 
1152  //**Default subtraction assignment to dense vectors*********************************************
1166  template< typename VT1 // Type of the left-hand side target vector
1167  , typename MT1 // Type of the left-hand side matrix operand
1168  , typename VT2 > // Type of the right-hand side vector operand
1169  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1170  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1171  {
1172  const size_t M( A.rows() );
1173  const size_t N( A.columns() );
1174 
1175  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1176  const size_t iend( M & size_t(-2) );
1177 
1178  for( size_t j=0UL; j<N; ++j ) {
1179  for( size_t i=0UL; i<iend; i+=2UL ) {
1180  y[i ] -= x[j] * A(i ,j);
1181  y[i+1UL] -= x[j] * A(i+1UL,j);
1182  }
1183  if( iend < M ) {
1184  y[iend] -= x[j] * A(iend,j);
1185  }
1186  }
1187  }
1189  //**********************************************************************************************
1190 
1191  //**Vectorized default subtraction assignment to dense vectors**********************************
1205  template< typename VT1 // Type of the left-hand side target vector
1206  , typename MT1 // Type of the left-hand side matrix operand
1207  , typename VT2 > // Type of the right-hand side vector operand
1208  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1209  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1210  {
1211  typedef IntrinsicTrait<ElementType> IT;
1212 
1213  const size_t M( A.spacing() );
1214  const size_t N( A.columns() );
1215 
1216  size_t i( 0UL );
1217 
1218  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1219  IntrinsicType xmm1( load( &y[i ] ) );
1220  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1221  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1222  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
1223  IntrinsicType xmm5( load( &y[i+IT::size*4UL] ) );
1224  IntrinsicType xmm6( load( &y[i+IT::size*5UL] ) );
1225  IntrinsicType xmm7( load( &y[i+IT::size*6UL] ) );
1226  IntrinsicType xmm8( load( &y[i+IT::size*7UL] ) );
1227  for( size_t j=0UL; j<N; ++j ) {
1228  const IntrinsicType x1( set( x[j] ) );
1229  xmm1 = xmm1 - A.get(i ,j) * x1;
1230  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1231  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1232  xmm4 = xmm4 - A.get(i+IT::size*3UL,j) * x1;
1233  xmm5 = xmm5 - A.get(i+IT::size*4UL,j) * x1;
1234  xmm6 = xmm6 - A.get(i+IT::size*5UL,j) * x1;
1235  xmm7 = xmm7 - A.get(i+IT::size*6UL,j) * x1;
1236  xmm8 = xmm8 - A.get(i+IT::size*7UL,j) * x1;
1237  }
1238  store( &y[i ], xmm1 );
1239  store( &y[i+IT::size ], xmm2 );
1240  store( &y[i+IT::size*2UL], xmm3 );
1241  store( &y[i+IT::size*3UL], xmm4 );
1242  store( &y[i+IT::size*4UL], xmm5 );
1243  store( &y[i+IT::size*5UL], xmm6 );
1244  store( &y[i+IT::size*6UL], xmm7 );
1245  store( &y[i+IT::size*7UL], xmm8 );
1246  }
1247  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1248  IntrinsicType xmm1( load( &y[i ] ) );
1249  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1250  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1251  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
1252  for( size_t j=0UL; j<N; ++j ) {
1253  const IntrinsicType x1( set( x[j] ) );
1254  xmm1 = xmm1 - A.get(i ,j) * x1;
1255  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1256  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1257  xmm4 = xmm4 - A.get(i+IT::size*3UL,j) * x1;
1258  }
1259  store( &y[i ], xmm1 );
1260  store( &y[i+IT::size ], xmm2 );
1261  store( &y[i+IT::size*2UL], xmm3 );
1262  store( &y[i+IT::size*3UL], xmm4 );
1263  }
1264  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
1265  IntrinsicType xmm1( load( &y[i ] ) );
1266  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1267  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1268  for( size_t j=0UL; j<N; ++j ) {
1269  const IntrinsicType x1( set( x[j] ) );
1270  xmm1 = xmm1 - A.get(i ,j) * x1;
1271  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1272  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1273  }
1274  store( &y[i ], xmm1 );
1275  store( &y[i+IT::size ], xmm2 );
1276  store( &y[i+IT::size*2UL], xmm3 );
1277  }
1278  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1279  IntrinsicType xmm1( load( &y[i ] ) );
1280  IntrinsicType xmm2( load( &y[i+IT::size] ) );
1281  for( size_t j=0UL; j<N; ++j ) {
1282  const IntrinsicType x1( set( x[j] ) );
1283  xmm1 = xmm1 - A.get(i ,j) * x1;
1284  xmm2 = xmm2 - A.get(i+IT::size,j) * x1;
1285  }
1286  store( &y[i ], xmm1 );
1287  store( &y[i+IT::size], xmm2 );
1288  }
1289  if( i < M ) {
1290  IntrinsicType xmm1( load( &y[i] ) );
1291  for( size_t j=0UL; j<N; ++j ) {
1292  xmm1 = xmm1 - A.get(i,j) * set( x[j] );
1293  }
1294  store( &y[i], xmm1 );
1295  }
1296  }
1298  //**********************************************************************************************
1299 
1300  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1314  template< typename VT1 // Type of the left-hand side target vector
1315  , typename MT1 // Type of the left-hand side matrix operand
1316  , typename VT2 > // Type of the right-hand side vector operand
1317  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1318  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1319  {
1320  selectDefaultSubAssignKernel( y, A, x );
1321  }
1323  //**********************************************************************************************
1324 
1325  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1326 #if BLAZE_BLAS_MODE
1327 
1340  template< typename VT1 // Type of the left-hand side target vector
1341  , typename MT1 // Type of the left-hand side matrix operand
1342  , typename VT2 > // Type of the right-hand side vector operand
1343  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1344  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1345  {
1346  using boost::numeric_cast;
1347 
1348  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1349  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1350  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1351 
1352  const int M ( numeric_cast<int>( A.rows() ) );
1353  const int N ( numeric_cast<int>( A.columns() ) );
1354  const int lda( numeric_cast<int>( A.spacing() ) );
1355 
1356  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -1.0F,
1357  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1358  }
1360 #endif
1361  //**********************************************************************************************
1362 
1363  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1364 #if BLAZE_BLAS_MODE
1365 
1378  template< typename VT1 // Type of the left-hand side target vector
1379  , typename MT1 // Type of the left-hand side matrix operand
1380  , typename VT2 > // Type of the right-hand side vector operand
1381  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1382  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1383  {
1384  using boost::numeric_cast;
1385 
1386  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1387  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1388  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1389 
1390  const int M ( numeric_cast<int>( A.rows() ) );
1391  const int N ( numeric_cast<int>( A.columns() ) );
1392  const int lda( numeric_cast<int>( A.spacing() ) );
1393 
1394  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -1.0,
1395  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1396  }
1398 #endif
1399  //**********************************************************************************************
1400 
1401  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1402 #if BLAZE_BLAS_MODE
1403 
1416  template< typename VT1 // Type of the left-hand side target vector
1417  , typename MT1 // Type of the left-hand side matrix operand
1418  , typename VT2 > // Type of the right-hand side vector operand
1419  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1420  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1421  {
1422  using boost::numeric_cast;
1423 
1424  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1425  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1426  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1427  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1428  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1429  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1430 
1431  const int M ( numeric_cast<int>( A.rows() ) );
1432  const int N ( numeric_cast<int>( A.columns() ) );
1433  const int lda( numeric_cast<int>( A.spacing() ) );
1434  const complex<float> alpha( -1.0F, 0.0F );
1435  const complex<float> beta ( 1.0F, 0.0F );
1436 
1437  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1438  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1439  }
1441 #endif
1442  //**********************************************************************************************
1443 
1444  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1445 #if BLAZE_BLAS_MODE
1446 
1459  template< typename VT1 // Type of the left-hand side target vector
1460  , typename MT1 // Type of the left-hand side matrix operand
1461  , typename VT2 > // Type of the right-hand side vector operand
1462  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1463  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1464  {
1465  using boost::numeric_cast;
1466 
1467  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1468  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1469  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1470  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1471  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1472  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1473 
1474  const int M ( numeric_cast<int>( A.rows() ) );
1475  const int N ( numeric_cast<int>( A.columns() ) );
1476  const int lda( numeric_cast<int>( A.spacing() ) );
1477  const complex<double> alpha( -1.0, 0.0 );
1478  const complex<double> beta ( 1.0, 0.0 );
1479 
1480  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1481  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1482  }
1484 #endif
1485  //**********************************************************************************************
1486 
1487  //**Subtraction assignment to sparse vectors****************************************************
1488  // No special implementation for the subtraction assignment to sparse vectors.
1489  //**********************************************************************************************
1490 
1491  //**Multiplication assignment to dense vectors**************************************************
1504  template< typename VT1 > // Type of the target dense vector
1505  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1506  {
1508 
1511  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1512 
1513  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1514 
1515  const ResultType tmp( rhs );
1516  multAssign( ~lhs, tmp );
1517  }
1519  //**********************************************************************************************
1520 
1521  //**Multiplication assignment to sparse vectors*************************************************
1522  // No special implementation for the multiplication assignment to sparse vectors.
1523  //**********************************************************************************************
1524 
1525  //**Compile time checks*************************************************************************
1532  //**********************************************************************************************
1533 };
1534 //*************************************************************************************************
1535 
1536 
1537 
1538 
1539 //=================================================================================================
1540 //
1541 // DVECSCALARMULTEXPR SPECIALIZATION
1542 //
1543 //=================================================================================================
1544 
1545 //*************************************************************************************************
1554 template< typename MT // Type of the left-hand side dense matrix
1555  , typename VT // Type of the right-hand side dense vector
1556  , typename ST > // Type of the side scalar value
1557 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
1558  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1559  , private Expression
1560  , private Computation
1561 {
1562  private:
1563  //**Type definitions****************************************************************************
1564  typedef TDMatDVecMultExpr<MT,VT> MVM;
1565  typedef typename MVM::ResultType RES;
1566  typedef typename MT::ResultType MRT;
1567  typedef typename VT::ResultType VRT;
1568  typedef typename MRT::ElementType MET;
1569  typedef typename VRT::ElementType VET;
1570  typedef typename MT::CompositeType MCT;
1571  typedef typename VT::CompositeType VCT;
1572  //**********************************************************************************************
1573 
1574  //**********************************************************************************************
1576  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1577  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1578  //**********************************************************************************************
1579 
1580  //**********************************************************************************************
1582 
1585  template< typename T1, typename T2, typename T3, typename T4 >
1586  struct UseSinglePrecisionKernel {
1587  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1588  IsFloat<typename T1::ElementType>::value &&
1589  IsFloat<typename T2::ElementType>::value &&
1590  IsFloat<typename T3::ElementType>::value &&
1591  !IsComplex<T4>::value };
1592  };
1593  //**********************************************************************************************
1594 
1595  //**********************************************************************************************
1597 
1600  template< typename T1, typename T2, typename T3, typename T4 >
1601  struct UseDoublePrecisionKernel {
1602  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1603  IsDouble<typename T1::ElementType>::value &&
1604  IsDouble<typename T2::ElementType>::value &&
1605  IsDouble<typename T3::ElementType>::value &&
1606  !IsComplex<T4>::value };
1607  };
1608  //**********************************************************************************************
1609 
1610  //**********************************************************************************************
1612 
1615  template< typename T1, typename T2, typename T3 >
1616  struct UseSinglePrecisionComplexKernel {
1617  typedef complex<float> Type;
1618  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1619  IsSame<typename T1::ElementType,Type>::value &&
1620  IsSame<typename T2::ElementType,Type>::value &&
1621  IsSame<typename T3::ElementType,Type>::value };
1622  };
1623  //**********************************************************************************************
1624 
1625  //**********************************************************************************************
1627 
1630  template< typename T1, typename T2, typename T3 >
1631  struct UseDoublePrecisionComplexKernel {
1632  typedef complex<double> Type;
1633  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1634  IsSame<typename T1::ElementType,Type>::value &&
1635  IsSame<typename T2::ElementType,Type>::value &&
1636  IsSame<typename T3::ElementType,Type>::value };
1637  };
1638  //**********************************************************************************************
1639 
1640  //**********************************************************************************************
1642 
1644  template< typename T1, typename T2, typename T3, typename T4 >
1645  struct UseDefaultKernel {
1646  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1647  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1648  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1649  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1650  };
1651  //**********************************************************************************************
1652 
1653  //**********************************************************************************************
1655 
1658  template< typename T1, typename T2, typename T3, typename T4 >
1659  struct UseVectorizedDefaultKernel {
1660  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1661  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1662  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1663  IsSame<typename T1::ElementType,T4>::value &&
1664  IntrinsicTrait<typename T1::ElementType>::addition &&
1665  IntrinsicTrait<typename T1::ElementType>::multiplication };
1666  };
1667  //**********************************************************************************************
1668 
1669  public:
1670  //**Type definitions****************************************************************************
1671  typedef DVecScalarMultExpr<MVM,ST,false> This;
1672  typedef typename MultTrait<RES,ST>::Type ResultType;
1673  typedef typename ResultType::TransposeType TransposeType;
1674  typedef typename ResultType::ElementType ElementType;
1675  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1676  typedef const ElementType ReturnType;
1677  typedef const ResultType CompositeType;
1678 
1680  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
1681 
1683  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
1684 
1686  typedef typename SelectType< evaluate, const MRT, MCT >::Type LT;
1687 
1689  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
1690  //**********************************************************************************************
1691 
1692  //**Compilation flags***************************************************************************
1694  enum { vectorizable = 0 };
1695  //**********************************************************************************************
1696 
1697  //**Constructor*********************************************************************************
1703  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1704  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1705  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1706  {}
1707  //**********************************************************************************************
1708 
1709  //**Subscript operator**************************************************************************
1715  inline ReturnType operator[]( size_t index ) const {
1716  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1717  return vector_[index] * scalar_;
1718  }
1719  //**********************************************************************************************
1720 
1721  //**Size function*******************************************************************************
1726  inline size_t size() const {
1727  return vector_.size();
1728  }
1729  //**********************************************************************************************
1730 
1731  //**Left operand access*************************************************************************
1736  inline LeftOperand leftOperand() const {
1737  return vector_;
1738  }
1739  //**********************************************************************************************
1740 
1741  //**Right operand access************************************************************************
1746  inline RightOperand rightOperand() const {
1747  return scalar_;
1748  }
1749  //**********************************************************************************************
1750 
1751  //**********************************************************************************************
1757  template< typename T >
1758  inline bool canAlias( const T* alias ) const {
1759  return vector_.canAlias( alias );
1760  }
1761  //**********************************************************************************************
1762 
1763  //**********************************************************************************************
1769  template< typename T >
1770  inline bool isAliased( const T* alias ) const {
1771  return vector_.isAliased( alias );
1772  }
1773  //**********************************************************************************************
1774 
1775  private:
1776  //**Member variables****************************************************************************
1777  LeftOperand vector_;
1778  RightOperand scalar_;
1779  //**********************************************************************************************
1780 
1781  //**Assignment to dense vectors*****************************************************************
1793  template< typename VT1 > // Type of the target dense vector
1794  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1795  {
1797 
1798  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1799 
1800  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1801  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1802 
1803  if( left.rows() == 0UL ) {
1804  return;
1805  }
1806  else if( left.columns() == 0UL ) {
1807  reset( ~lhs );
1808  return;
1809  }
1810 
1811  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1812  RT x( right ); // Evaluation of the right-hand side dense vector operand
1813 
1814  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1815  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1816  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1817  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1818 
1819  if( ( IsComputation<MT>::value && !evaluate ) ||
1820  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1821  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1822  else
1823  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1824  }
1825  //**********************************************************************************************
1826 
1827  //**Default assignment to dense vectors*********************************************************
1841  template< typename VT1 // Type of the left-hand side target vector
1842  , typename MT1 // Type of the left-hand side matrix operand
1843  , typename VT2 // Type of the right-hand side vector operand
1844  , typename ST2 > // Type of the scalar value
1845  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1846  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1847  {
1848  const size_t M( A.rows() );
1849  const size_t N( A.columns() );
1850 
1851  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1852  const size_t iend( M & size_t(-2) );
1853 
1854  for( size_t i=0UL; i<M; ++i ) {
1855  y[i] = x[0UL] * A(i,0UL);
1856  }
1857  for( size_t j=1UL; j<N; ++j ) {
1858  for( size_t i=0UL; i<iend; i+=2UL ) {
1859  y[i ] += x[j] * A(i ,j);
1860  y[i+1UL] += x[j] * A(i+1UL,j);
1861  }
1862  if( iend < M ) {
1863  y[iend] += x[j] * A(iend,j);
1864  }
1865  }
1866  for( size_t i=0UL; i<M; ++i ) {
1867  y[i] *= scalar;
1868  }
1869  }
1870  //**********************************************************************************************
1871 
1872  //**Vectorized default assignment to dense vectors**********************************************
1886  template< typename VT1 // Type of the left-hand side target vector
1887  , typename MT1 // Type of the left-hand side matrix operand
1888  , typename VT2 // Type of the right-hand side vector operand
1889  , typename ST2 > // Type of the scalar value
1890  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1891  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1892  {
1893  typedef IntrinsicTrait<ElementType> IT;
1894 
1895  const size_t M( A.spacing() );
1896  const size_t N( A.columns() );
1897 
1898  const IntrinsicType factor( set( scalar ) );
1899 
1900  size_t i( 0UL );
1901 
1902  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1903  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1904  for( size_t j=0UL; j<N; ++j ) {
1905  const IntrinsicType x1( set( x[j] ) );
1906  xmm1 = xmm1 + A.get(i ,j) * x1;
1907  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1908  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1909  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
1910  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
1911  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
1912  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
1913  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
1914  }
1915  store( &y[i ], xmm1*factor );
1916  store( &y[i+IT::size ], xmm2*factor );
1917  store( &y[i+IT::size*2UL], xmm3*factor );
1918  store( &y[i+IT::size*3UL], xmm4*factor );
1919  store( &y[i+IT::size*4UL], xmm5*factor );
1920  store( &y[i+IT::size*5UL], xmm6*factor );
1921  store( &y[i+IT::size*6UL], xmm7*factor );
1922  store( &y[i+IT::size*7UL], xmm8*factor );
1923  }
1924  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1925  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1926  for( size_t j=0UL; j<N; ++j ) {
1927  const IntrinsicType x1( set( x[j] ) );
1928  xmm1 = xmm1 + A.get(i ,j) * x1;
1929  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1930  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1931  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
1932  }
1933  store( &y[i ], xmm1*factor );
1934  store( &y[i+IT::size ], xmm2*factor );
1935  store( &y[i+IT::size*2UL], xmm3*factor );
1936  store( &y[i+IT::size*3UL], xmm4*factor );
1937  }
1938  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
1939  IntrinsicType xmm1, xmm2, xmm3;
1940  for( size_t j=0UL; j<N; ++j ) {
1941  const IntrinsicType x1( set( x[j] ) );
1942  xmm1 = xmm1 + A.get(i ,j) * x1;
1943  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1944  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1945  }
1946  store( &y[i ], xmm1*factor );
1947  store( &y[i+IT::size ], xmm2*factor );
1948  store( &y[i+IT::size*2UL], xmm3*factor );
1949  }
1950  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1951  IntrinsicType xmm1, xmm2;
1952  for( size_t j=0UL; j<N; ++j ) {
1953  const IntrinsicType x1( set( x[j] ) );
1954  xmm1 = xmm1 + A.get(i ,j) * x1;
1955  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
1956  }
1957  store( &y[i ], xmm1*factor );
1958  store( &y[i+IT::size], xmm2*factor );
1959  }
1960  if( i < M ) {
1961  IntrinsicType xmm1;
1962  for( size_t j=0UL; j<N; ++j ) {
1963  const IntrinsicType x1( set( x[j] ) );
1964  xmm1 = xmm1 + A.get(i,j) * x1;
1965  }
1966  store( &y[i], xmm1*factor );
1967  }
1968  }
1969  //**********************************************************************************************
1970 
1971  //**BLAS-based assignment to dense vectors (default)********************************************
1985  template< typename VT1 // Type of the left-hand side target vector
1986  , typename MT1 // Type of the left-hand side matrix operand
1987  , typename VT2 // Type of the right-hand side vector operand
1988  , typename ST2 > // Type of the scalar value
1989  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1990  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1991  {
1992  selectDefaultAssignKernel( y, A, x, scalar );
1993  }
1994  //**********************************************************************************************
1995 
1996  //**BLAS-based assignment to dense vectors (single precision)***********************************
1997 #if BLAZE_BLAS_MODE
1998 
2011  template< typename VT1 // Type of the left-hand side target vector
2012  , typename MT1 // Type of the left-hand side matrix operand
2013  , typename VT2 // Type of the right-hand side vector operand
2014  , typename ST2 > // Type of the scalar value
2015  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2016  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2017  {
2018  using boost::numeric_cast;
2019 
2020  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2021  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2022  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2023 
2024  const int M ( numeric_cast<int>( A.rows() ) );
2025  const int N ( numeric_cast<int>( A.columns() ) );
2026  const int lda( numeric_cast<int>( A.spacing() ) );
2027 
2028  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2029  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2030  }
2031 #endif
2032  //**********************************************************************************************
2033 
2034  //**BLAS-based assignment to dense vectors (double precision)***********************************
2035 #if BLAZE_BLAS_MODE
2036 
2049  template< typename VT1 // Type of the left-hand side target vector
2050  , typename MT1 // Type of the left-hand side matrix operand
2051  , typename VT2 // Type of the right-hand side vector operand
2052  , typename ST2 > // Type of the scalar value
2053  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2054  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2055  {
2056  using boost::numeric_cast;
2057 
2058  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2059  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2060  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2061 
2062  const int M ( numeric_cast<int>( A.rows() ) );
2063  const int N ( numeric_cast<int>( A.columns() ) );
2064  const int lda( numeric_cast<int>( A.spacing() ) );
2065 
2066  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2067  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2068  }
2069 #endif
2070  //**********************************************************************************************
2071 
2072  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2073 #if BLAZE_BLAS_MODE
2074 
2087  template< typename VT1 // Type of the left-hand side target vector
2088  , typename MT1 // Type of the left-hand side matrix operand
2089  , typename VT2 // Type of the right-hand side vector operand
2090  , typename ST2 > // Type of the scalar value
2091  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2092  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2093  {
2094  using boost::numeric_cast;
2095 
2096  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2097  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2098  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2100  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2101  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2102  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2103 
2104  const int M ( numeric_cast<int>( A.rows() ) );
2105  const int N ( numeric_cast<int>( A.columns() ) );
2106  const int lda( numeric_cast<int>( A.spacing() ) );
2107  const complex<float> alpha( scalar );
2108  const complex<float> beta ( 0.0F, 0.0F );
2109 
2110  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2111  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2112  }
2113 #endif
2114  //**********************************************************************************************
2115 
2116  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2117 #if BLAZE_BLAS_MODE
2118 
2131  template< typename VT1 // Type of the left-hand side target vector
2132  , typename MT1 // Type of the left-hand side matrix operand
2133  , typename VT2 // Type of the right-hand side vector operand
2134  , typename ST2 > // Type of the scalar value
2135  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2136  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2137  {
2138  using boost::numeric_cast;
2139 
2140  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2141  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2142  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2144  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2145  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2146  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2147 
2148  const int M ( numeric_cast<int>( A.rows() ) );
2149  const int N ( numeric_cast<int>( A.columns() ) );
2150  const int lda( numeric_cast<int>( A.spacing() ) );
2151  const complex<double> alpha( scalar );
2152  const complex<double> beta ( 0.0, 0.0 );
2153 
2154  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2155  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2156  }
2157 #endif
2158  //**********************************************************************************************
2159 
2160  //**Assignment to sparse vectors****************************************************************
2172  template< typename VT1 > // Type of the target sparse vector
2173  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2174  {
2176 
2179  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2180 
2181  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2182 
2183  const ResultType tmp( rhs );
2184  assign( ~lhs, tmp );
2185  }
2186  //**********************************************************************************************
2187 
2188  //**Addition assignment to dense vectors********************************************************
2200  template< typename VT1 > // Type of the target dense vector
2201  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2202  {
2204 
2205  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2206 
2207  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2208  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2209 
2210  if( left.rows() == 0UL || left.columns() == 0UL ) {
2211  return;
2212  }
2213 
2214  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2215  RT x( right ); // Evaluation of the right-hand side dense vector operand
2216 
2217  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2218  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2219  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2220  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2221 
2222  if( ( IsComputation<MT>::value && !evaluate ) ||
2223  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2224  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2225  else
2226  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2227  }
2228  //**********************************************************************************************
2229 
2230  //**Default addition assignment to dense vectors************************************************
2244  template< typename VT1 // Type of the left-hand side target vector
2245  , typename MT1 // Type of the left-hand side matrix operand
2246  , typename VT2 // Type of the right-hand side vector operand
2247  , typename ST2 > // Type of the scalar value
2248  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2249  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2250  {
2251  y.addAssign( A * x * scalar );
2252  }
2253  //**********************************************************************************************
2254 
2255  //**Vectorized default addition assignment to dense vectors*************************************
2269  template< typename VT1 // Type of the left-hand side target vector
2270  , typename MT1 // Type of the left-hand side matrix operand
2271  , typename VT2 // Type of the right-hand side vector operand
2272  , typename ST2 > // Type of the scalar value
2273  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2274  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2275  {
2276  typedef IntrinsicTrait<ElementType> IT;
2277 
2278  const size_t M( A.spacing() );
2279  const size_t N( A.columns() );
2280 
2281  const IntrinsicType factor( set( scalar ) );
2282 
2283  size_t i( 0UL );
2284 
2285  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2286  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2287  for( size_t j=0UL; j<N; ++j ) {
2288  const IntrinsicType x1( set( x[j] ) );
2289  xmm1 = xmm1 + A.get(i ,j) * x1;
2290  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2291  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2292  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2293  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
2294  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
2295  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
2296  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
2297  }
2298  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2299  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2300  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2301  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) + xmm4*factor );
2302  store( &y[i+IT::size*4UL], load( &y[i+IT::size*4UL] ) + xmm5*factor );
2303  store( &y[i+IT::size*5UL], load( &y[i+IT::size*5UL] ) + xmm6*factor );
2304  store( &y[i+IT::size*6UL], load( &y[i+IT::size*6UL] ) + xmm7*factor );
2305  store( &y[i+IT::size*7UL], load( &y[i+IT::size*7UL] ) + xmm8*factor );
2306  }
2307  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2308  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2309  for( size_t j=0UL; j<N; ++j ) {
2310  const IntrinsicType x1( set( x[j] ) );
2311  xmm1 = xmm1 + A.get(i ,j) * x1;
2312  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2313  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2314  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2315  }
2316  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2317  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2318  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2319  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) + xmm4*factor );
2320  }
2321  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
2322  IntrinsicType xmm1, xmm2, xmm3;
2323  for( size_t j=0UL; j<N; ++j ) {
2324  const IntrinsicType x1( set( x[j] ) );
2325  xmm1 = xmm1 + A.get(i ,j) * x1;
2326  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2327  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2328  }
2329  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2330  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2331  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2332  }
2333  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2334  IntrinsicType xmm1, xmm2;
2335  for( size_t j=0UL; j<N; ++j ) {
2336  const IntrinsicType x1( set( x[j] ) );
2337  xmm1 = xmm1 + A.get(i ,j) * x1;
2338  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
2339  }
2340  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2341  store( &y[i+IT::size], load( &y[i+IT::size] ) + xmm2*factor );
2342  }
2343  if( i < M ) {
2344  IntrinsicType xmm1;
2345  for( size_t j=0UL; j<N; ++j ) {
2346  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
2347  }
2348  store( &y[i], load( &y[i] ) + xmm1*factor );
2349  }
2350  }
2351  //**********************************************************************************************
2352 
2353  //**BLAS-based addition assignment to dense vectors (default)***********************************
2367  template< typename VT1 // Type of the left-hand side target vector
2368  , typename MT1 // Type of the left-hand side matrix operand
2369  , typename VT2 // Type of the right-hand side vector operand
2370  , typename ST2 > // Type of the scalar value
2371  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2372  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2373  {
2374  selectDefaultAddAssignKernel( y, A, x, scalar );
2375  }
2376  //**********************************************************************************************
2377 
2378  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2379 #if BLAZE_BLAS_MODE
2380 
2393  template< typename VT1 // Type of the left-hand side target vector
2394  , typename MT1 // Type of the left-hand side matrix operand
2395  , typename VT2 // Type of the right-hand side vector operand
2396  , typename ST2 > // Type of the scalar value
2397  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2398  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2399  {
2400  using boost::numeric_cast;
2401 
2402  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2403  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2404  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2405 
2406  const int M ( numeric_cast<int>( A.rows() ) );
2407  const int N ( numeric_cast<int>( A.columns() ) );
2408  const int lda( numeric_cast<int>( A.spacing() ) );
2409 
2410  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2411  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2412  }
2413 #endif
2414  //**********************************************************************************************
2415 
2416  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2417 #if BLAZE_BLAS_MODE
2418 
2431  template< typename VT1 // Type of the left-hand side target vector
2432  , typename MT1 // Type of the left-hand side matrix operand
2433  , typename VT2 // Type of the right-hand side vector operand
2434  , typename ST2 > // Type of the scalar value
2435  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2436  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2437  {
2438  using boost::numeric_cast;
2439 
2440  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2441  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2442  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2443 
2444  const int M ( numeric_cast<int>( A.rows() ) );
2445  const int N ( numeric_cast<int>( A.columns() ) );
2446  const int lda( numeric_cast<int>( A.spacing() ) );
2447 
2448  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2449  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2450  }
2451 #endif
2452  //**********************************************************************************************
2453 
2454  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2455 #if BLAZE_BLAS_MODE
2456 
2469  template< typename VT1 // Type of the left-hand side target vector
2470  , typename MT1 // Type of the left-hand side matrix operand
2471  , typename VT2 // Type of the right-hand side vector operand
2472  , typename ST2 > // Type of the scalar value
2473  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2474  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2475  {
2476  using boost::numeric_cast;
2477 
2478  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2479  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2480  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2482  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2483  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2484  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2485 
2486  const int M ( numeric_cast<int>( A.rows() ) );
2487  const int N ( numeric_cast<int>( A.columns() ) );
2488  const int lda( numeric_cast<int>( A.spacing() ) );
2489  const complex<float> alpha( scalar );
2490  const complex<float> beta ( 1.0F, 0.0F );
2491 
2492  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2493  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2494  }
2495 #endif
2496  //**********************************************************************************************
2497 
2498  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2499 #if BLAZE_BLAS_MODE
2500 
2513  template< typename VT1 // Type of the left-hand side target vector
2514  , typename MT1 // Type of the left-hand side matrix operand
2515  , typename VT2 // Type of the right-hand side vector operand
2516  , typename ST2 > // Type of the scalar value
2517  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2518  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2519  {
2520  using boost::numeric_cast;
2521 
2522  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2523  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2524  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2526  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2527  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2528  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2529 
2530  const int M ( numeric_cast<int>( A.rows() ) );
2531  const int N ( numeric_cast<int>( A.columns() ) );
2532  const int lda( numeric_cast<int>( A.spacing() ) );
2533  const complex<double> alpha( scalar );
2534  const complex<double> beta ( 1.0, 0.0 );
2535 
2536  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2537  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2538  }
2539 #endif
2540  //**********************************************************************************************
2541 
2542  //**Addition assignment to sparse vectors*******************************************************
2543  // No special implementation for the addition assignment to sparse vectors.
2544  //**********************************************************************************************
2545 
2546  //**Subtraction assignment to dense vectors*****************************************************
2558  template< typename VT1 > // Type of the target dense vector
2559  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2560  {
2562 
2563  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2564 
2565  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2566  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2567 
2568  if( left.rows() == 0UL || left.columns() == 0UL ) {
2569  return;
2570  }
2571 
2572  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2573  RT x( right ); // Evaluation of the right-hand side dense vector operand
2574 
2575  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2576  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2577  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2578  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2579 
2580  if( ( IsComputation<MT>::value && !evaluate ) ||
2581  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2582  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2583  else
2584  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2585  }
2586  //**********************************************************************************************
2587 
2588  //**Default subtraction assignment to dense vectors*********************************************
2602  template< typename VT1 // Type of the left-hand side target vector
2603  , typename MT1 // Type of the left-hand side matrix operand
2604  , typename VT2 // Type of the right-hand side vector operand
2605  , typename ST2 > // Type of the scalar value
2606  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2607  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2608  {
2609  y.subAssign( A * x * scalar );
2610  }
2611  //**********************************************************************************************
2612 
2613  //**Vectorized default subtraction assignment to dense vectors**********************************
2627  template< typename VT1 // Type of the left-hand side target vector
2628  , typename MT1 // Type of the left-hand side matrix operand
2629  , typename VT2 // Type of the right-hand side vector operand
2630  , typename ST2 > // Type of the scalar value
2631  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2632  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2633  {
2634  typedef IntrinsicTrait<ElementType> IT;
2635 
2636  const size_t M( A.spacing() );
2637  const size_t N( A.columns() );
2638 
2639  const IntrinsicType factor( set( scalar ) );
2640 
2641  size_t i( 0UL );
2642 
2643  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2644  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2645  for( size_t j=0UL; j<N; ++j ) {
2646  const IntrinsicType x1( set( x[j] ) );
2647  xmm1 = xmm1 + A.get(i ,j) * x1;
2648  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2649  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2650  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2651  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
2652  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
2653  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
2654  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
2655  }
2656  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2657  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2658  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2659  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) - xmm4*factor );
2660  store( &y[i+IT::size*4UL], load( &y[i+IT::size*4UL] ) - xmm5*factor );
2661  store( &y[i+IT::size*5UL], load( &y[i+IT::size*5UL] ) - xmm6*factor );
2662  store( &y[i+IT::size*6UL], load( &y[i+IT::size*6UL] ) - xmm7*factor );
2663  store( &y[i+IT::size*7UL], load( &y[i+IT::size*7UL] ) - xmm8*factor );
2664  }
2665  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2666  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2667  for( size_t j=0UL; j<N; ++j ) {
2668  const IntrinsicType x1( set( x[j] ) );
2669  xmm1 = xmm1 + A.get(i ,j) * x1;
2670  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2671  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2672  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2673  }
2674  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2675  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2676  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2677  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) - xmm4*factor );
2678  }
2679  for( ; (i+IT::size*3UL) <= M; i+=IT::size*3UL ) {
2680  IntrinsicType xmm1, xmm2, xmm3;
2681  for( size_t j=0UL; j<N; ++j ) {
2682  const IntrinsicType x1( set( x[j] ) );
2683  xmm1 = xmm1 + A.get(i ,j) * x1;
2684  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2685  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2686  }
2687  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2688  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2689  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2690  }
2691  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2692  IntrinsicType xmm1, xmm2;
2693  for( size_t j=0UL; j<N; ++j ) {
2694  const IntrinsicType x1( set( x[j] ) );
2695  xmm1 = xmm1 + A.get(i ,j) * x1;
2696  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
2697  }
2698  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2699  store( &y[i+IT::size], load( &y[i+IT::size] ) - xmm2*factor );
2700  }
2701  if( i < M ) {
2702  IntrinsicType xmm1;
2703  for( size_t j=0UL; j<N; ++j ) {
2704  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
2705  }
2706  store( &y[i], load( &y[i] ) - xmm1*factor );
2707  }
2708  }
2709  //**********************************************************************************************
2710 
2711  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2725  template< typename VT1 // Type of the left-hand side target vector
2726  , typename MT1 // Type of the left-hand side matrix operand
2727  , typename VT2 // Type of the right-hand side vector operand
2728  , typename ST2 > // Type of the scalar value
2729  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2730  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2731  {
2732  selectDefaultSubAssignKernel( y, A, x, scalar );
2733  }
2734  //**********************************************************************************************
2735 
2736  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2737 #if BLAZE_BLAS_MODE
2738 
2751  template< typename VT1 // Type of the left-hand side target vector
2752  , typename MT1 // Type of the left-hand side matrix operand
2753  , typename VT2 // Type of the right-hand side vector operand
2754  , typename ST2 > // Type of the scalar value
2755  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2756  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2757  {
2758  using boost::numeric_cast;
2759 
2760  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2761  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2762  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2763 
2764  const int M ( numeric_cast<int>( A.rows() ) );
2765  const int N ( numeric_cast<int>( A.columns() ) );
2766  const int lda( numeric_cast<int>( A.spacing() ) );
2767 
2768  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2769  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2770  }
2771 #endif
2772  //**********************************************************************************************
2773 
2774  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2775 #if BLAZE_BLAS_MODE
2776 
2789  template< typename VT1 // Type of the left-hand side target vector
2790  , typename MT1 // Type of the left-hand side matrix operand
2791  , typename VT2 // Type of the right-hand side vector operand
2792  , typename ST2 > // Type of the scalar value
2793  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2794  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2795  {
2796  using boost::numeric_cast;
2797 
2798  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2799  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2800  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2801 
2802  const int M ( numeric_cast<int>( A.rows() ) );
2803  const int N ( numeric_cast<int>( A.columns() ) );
2804  const int lda( numeric_cast<int>( A.spacing() ) );
2805 
2806  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2807  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2808  }
2809 #endif
2810  //**********************************************************************************************
2811 
2812  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2813 #if BLAZE_BLAS_MODE
2814 
2827  template< typename VT1 // Type of the left-hand side target vector
2828  , typename MT1 // Type of the left-hand side matrix operand
2829  , typename VT2 // Type of the right-hand side vector operand
2830  , typename ST2 > // Type of the scalar value
2831  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2832  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2833  {
2834  using boost::numeric_cast;
2835 
2836  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2837  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2838  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2840  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2841  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2842  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2843 
2844  const int M ( numeric_cast<int>( A.rows() ) );
2845  const int N ( numeric_cast<int>( A.columns() ) );
2846  const int lda( numeric_cast<int>( A.spacing() ) );
2847  const complex<float> alpha( -scalar );
2848  const complex<float> beta ( 1.0F, 0.0F );
2849 
2850  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2851  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2852  }
2853 #endif
2854  //**********************************************************************************************
2855 
2856  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2857 #if BLAZE_BLAS_MODE
2858 
2871  template< typename VT1 // Type of the left-hand side target vector
2872  , typename MT1 // Type of the left-hand side matrix operand
2873  , typename VT2 // Type of the right-hand side vector operand
2874  , typename ST2 > // Type of the scalar value
2875  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2876  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2877  {
2878  using boost::numeric_cast;
2879 
2880  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2881  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2882  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2884  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2885  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2886  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2887 
2888  const int M ( numeric_cast<int>( A.rows() ) );
2889  const int N ( numeric_cast<int>( A.columns() ) );
2890  const int lda( numeric_cast<int>( A.spacing() ) );
2891  const complex<double> alpha( -scalar );
2892  const complex<double> beta ( 1.0, 0.0 );
2893 
2894  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2895  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2896  }
2897 #endif
2898  //**********************************************************************************************
2899 
2900  //**Subtraction assignment to sparse vectors****************************************************
2901  // No special implementation for the subtraction assignment to sparse vectors.
2902  //**********************************************************************************************
2903 
2904  //**Multiplication assignment to dense vectors**************************************************
2916  template< typename VT1 > // Type of the target dense vector
2917  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2918  {
2920 
2923  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2924 
2925  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2926 
2927  const ResultType tmp( rhs );
2928  multAssign( ~lhs, tmp );
2929  }
2930  //**********************************************************************************************
2931 
2932  //**Multiplication assignment to sparse vectors*************************************************
2933  // No special implementation for the multiplication assignment to sparse vectors.
2934  //**********************************************************************************************
2935 
2936  //**Compile time checks*************************************************************************
2944  //**********************************************************************************************
2945 };
2947 //*************************************************************************************************
2948 
2949 
2950 
2951 
2952 //=================================================================================================
2953 //
2954 // GLOBAL BINARY ARITHMETIC OPERATORS
2955 //
2956 //=================================================================================================
2957 
2958 //*************************************************************************************************
2989 template< typename T1 // Type of the left-hand side dense matrix
2990  , typename T2 > // Type of the right-hand side dense vector
2991 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
2993 {
2995 
2996  if( (~mat).columns() != (~vec).size() )
2997  throw std::invalid_argument( "Matrix and vector sizes do not match" );
2998 
2999  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
3000 }
3001 //*************************************************************************************************
3002 
3003 } // namespace blaze
3004 
3005 #endif