All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
41 #include <blaze/math/Intrinsics.h>
42 #include <blaze/math/shims/Reset.h>
48 #include <blaze/system/BLAS.h>
50 #include <blaze/util/Assert.h>
51 #include <blaze/util/Complex.h>
57 #include <blaze/util/DisableIf.h>
58 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/SelectType.h>
61 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // CLASS TDMATDVECMULTEXPR
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
84 template< typename MT // Type of the left-hand side dense matrix
85  , typename VT > // Type of the right-hand side dense vector
86 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
87  , private MatVecMultExpr
88  , private Computation
89 {
90  private:
91  //**Type definitions****************************************************************************
92  typedef typename MT::ResultType MRT;
93  typedef typename VT::ResultType VRT;
94  typedef typename MRT::ElementType MET;
95  typedef typename VRT::ElementType VET;
96  typedef typename MT::CompositeType MCT;
97  typedef typename VT::CompositeType VCT;
98  //**********************************************************************************************
99 
100  //**********************************************************************************************
102  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
104  //**********************************************************************************************
105 
106  //**********************************************************************************************
108 
109 
112  template< typename T1, typename T2, typename T3 >
113  struct UseSinglePrecisionKernel {
114  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
118  };
120  //**********************************************************************************************
121 
122  //**********************************************************************************************
124 
125 
128  template< typename T1, typename T2, typename T3 >
129  struct UseDoublePrecisionKernel {
130  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
131  IsDouble<typename T1::ElementType>::value &&
132  IsDouble<typename T2::ElementType>::value &&
133  IsDouble<typename T3::ElementType>::value };
134  };
136  //**********************************************************************************************
137 
138  //**********************************************************************************************
140 
141 
144  template< typename T1, typename T2, typename T3 >
145  struct UseSinglePrecisionComplexKernel {
146  typedef complex<float> Type;
147  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
148  IsSame<typename T1::ElementType,Type>::value &&
149  IsSame<typename T2::ElementType,Type>::value &&
150  IsSame<typename T3::ElementType,Type>::value };
151  };
153  //**********************************************************************************************
154 
155  //**********************************************************************************************
157 
158 
161  template< typename T1, typename T2, typename T3 >
162  struct UseDoublePrecisionComplexKernel {
163  typedef complex<double> Type;
164  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
165  IsSame<typename T1::ElementType,Type>::value &&
166  IsSame<typename T2::ElementType,Type>::value &&
167  IsSame<typename T3::ElementType,Type>::value };
168  };
170  //**********************************************************************************************
171 
172  //**********************************************************************************************
174 
175 
177  template< typename T1, typename T2, typename T3 >
178  struct UseDefaultKernel {
179  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
180  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
181  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
182  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
183  };
185  //**********************************************************************************************
186 
187  //**********************************************************************************************
189 
190 
193  template< typename T1, typename T2, typename T3 >
194  struct UseVectorizedDefaultKernel {
195  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
196  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
197  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
198  IntrinsicTrait<typename T1::ElementType>::addition &&
199  IntrinsicTrait<typename T1::ElementType>::multiplication };
200  };
202  //**********************************************************************************************
203 
204  public:
205  //**Type definitions****************************************************************************
208  typedef typename ResultType::TransposeType TransposeType;
209  typedef typename ResultType::ElementType ElementType;
211  typedef const ElementType ReturnType;
212  typedef const ResultType CompositeType;
213 
215  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
216 
218  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
219 
222 
224  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
225  //**********************************************************************************************
226 
227  //**Compilation flags***************************************************************************
229  enum { vectorizable = 0 };
230  //**********************************************************************************************
231 
232  //**Constructor*********************************************************************************
238  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
239  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
240  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
241  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
242  {
243  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
244  }
245  //**********************************************************************************************
246 
247  //**Subscript operator**************************************************************************
253  inline ReturnType operator[]( size_t index ) const {
254  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
255 
256  ElementType res;
257 
258  if( mat_.columns() != 0UL ) {
259  res = mat_(index,0UL) * vec_[0UL];
260  for( size_t j=1UL; j<end_; j+=2UL ) {
261  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
262  }
263  if( end_ < mat_.columns() ) {
264  res += mat_(index,end_) * vec_[end_];
265  }
266  }
267  else {
268  reset( res );
269  }
270 
271  return res;
272  }
273  //**********************************************************************************************
274 
275  //**Size function*******************************************************************************
280  inline size_t size() const {
281  return mat_.rows();
282  }
283  //**********************************************************************************************
284 
285  //**Left function*******************************************************************************
290  inline LeftOperand leftOperand() const {
291  return mat_;
292  }
293  //**********************************************************************************************
294 
295  //**Right function******************************************************************************
300  inline RightOperand rightOperand() const {
301  return vec_;
302  }
303  //**********************************************************************************************
304 
305  //**********************************************************************************************
311  template< typename T >
312  inline bool canAlias( const T* alias ) const {
313  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
314  }
315  //**********************************************************************************************
316 
317  //**********************************************************************************************
323  template< typename T >
324  inline bool isAliased( const T* alias ) const {
325  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
326  }
327  //**********************************************************************************************
328 
329  private:
330  //**Member variables****************************************************************************
333  const size_t end_;
334  //**********************************************************************************************
335 
336  //**Assignment to dense vectors*****************************************************************
348  template< typename VT1 > // Type of the target dense vector
349  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
350  {
352 
353  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
354 
355  if( rhs.mat_.rows() == 0UL ) {
356  return;
357  }
358  else if( rhs.mat_.columns() == 0UL ) {
359  reset( ~lhs );
360  return;
361  }
362 
363  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
364  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
365 
366  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
367  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
368  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
369  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
370 
371  if( ( IsComputation<MT>::value && !evaluate ) ||
372  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
373  TDMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
374  else
375  TDMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
376  }
378  //**********************************************************************************************
379 
380  //**Default assignment to dense vectors*********************************************************
394  template< typename VT1 // Type of the left-hand side target vector
395  , typename MT1 // Type of the left-hand side matrix operand
396  , typename VT2 > // Type of the right-hand side vector operand
397  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
398  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
399  {
400  const size_t M( A.rows() );
401  const size_t N( A.columns() );
402 
403  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
404  const size_t iend( M & size_t(-2) );
405 
406  for( size_t i=0UL; i<M; ++i ) {
407  y[i] = x[0UL] * A(i,0UL);
408  }
409  for( size_t j=1UL; j<N; ++j ) {
410  for( size_t i=0UL; i<iend; i+=2UL ) {
411  y[i ] += x[j] * A(i ,j);
412  y[i+1UL] += x[j] * A(i+1UL,j);
413  }
414  if( iend < M ) {
415  y[iend] += x[j] * A(iend,j);
416  }
417  }
418  }
420  //**********************************************************************************************
421 
422  //**Vectorized default assignment to dense vectors**********************************************
436  template< typename VT1 // Type of the left-hand side target vector
437  , typename MT1 // Type of the left-hand side matrix operand
438  , typename VT2 > // Type of the right-hand side vector operand
439  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
440  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
441  {
442  typedef IntrinsicTrait<ElementType> IT;
443 
444  const size_t M( A.rows() );
445  const size_t N( A.columns() );
446 
447  size_t i( 0UL );
448 
449  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
450  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
451  for( size_t j=0UL; j<N; ++j ) {
452  const IntrinsicType x1( set( x[j] ) );
453  xmm1 = xmm1 + A.get(i ,j) * x1;
454  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
455  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
456  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
457  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
458  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
459  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
460  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
461  }
462  store( &y[i ], xmm1 );
463  store( &y[i+IT::size ], xmm2 );
464  store( &y[i+IT::size*2UL], xmm3 );
465  store( &y[i+IT::size*3UL], xmm4 );
466  store( &y[i+IT::size*4UL], xmm5 );
467  store( &y[i+IT::size*5UL], xmm6 );
468  store( &y[i+IT::size*6UL], xmm7 );
469  store( &y[i+IT::size*7UL], xmm8 );
470  }
471  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
472  IntrinsicType xmm1, xmm2, xmm3, xmm4;
473  for( size_t j=0UL; j<N; ++j ) {
474  const IntrinsicType x1( set( x[j] ) );
475  xmm1 = xmm1 + A.get(i ,j) * x1;
476  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
477  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
478  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
479  }
480  store( &y[i ], xmm1 );
481  store( &y[i+IT::size ], xmm2 );
482  store( &y[i+IT::size*2UL], xmm3 );
483  store( &y[i+IT::size*3UL], xmm4 );
484  }
485  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
486  IntrinsicType xmm1, xmm2, xmm3;
487  for( size_t j=0UL; j<N; ++j ) {
488  const IntrinsicType x1( set( x[j] ) );
489  xmm1 = xmm1 + A.get(i ,j) * x1;
490  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
491  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
492  }
493  store( &y[i ], xmm1 );
494  store( &y[i+IT::size ], xmm2 );
495  store( &y[i+IT::size*2UL], xmm3 );
496  }
497  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
498  IntrinsicType xmm1, xmm2;
499  for( size_t j=0UL; j<N; ++j ) {
500  const IntrinsicType x1( set( x[j] ) );
501  xmm1 = xmm1 + A.get(i ,j) * x1;
502  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
503  }
504  store( &y[i ], xmm1 );
505  store( &y[i+IT::size], xmm2 );
506  }
507  if( i < M ) {
508  IntrinsicType xmm1;
509  for( size_t j=0UL; j<N; ++j ) {
510  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
511  }
512  store( &y[i], xmm1 );
513  }
514  }
516  //**********************************************************************************************
517 
518  //**BLAS-based assignment to dense vectors (default)********************************************
532  template< typename VT1 // Type of the left-hand side target vector
533  , typename MT1 // Type of the left-hand side matrix operand
534  , typename VT2 > // Type of the right-hand side vector operand
535  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
536  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
537  {
538  selectDefaultAssignKernel( y, A, x );
539  }
541  //**********************************************************************************************
542 
543  //**BLAS-based assignment to dense vectors (single precision)***********************************
544 #if BLAZE_BLAS_MODE
545 
558  template< typename VT1 // Type of the left-hand side target vector
559  , typename MT1 // Type of the left-hand side matrix operand
560  , typename VT2 > // Type of the right-hand side vector operand
561  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
562  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
563  {
564  using boost::numeric_cast;
565 
566  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
567  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
568  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
569 
570  const int M ( numeric_cast<int>( A.rows() ) );
571  const int N ( numeric_cast<int>( A.columns() ) );
572  const int lda( numeric_cast<int>( A.spacing() ) );
573 
574  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
575  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
576  }
578 #endif
579  //**********************************************************************************************
580 
581  //**BLAS-based assignment to dense vectors (double precision)***********************************
582 #if BLAZE_BLAS_MODE
583 
596  template< typename VT1 // Type of the left-hand side target vector
597  , typename MT1 // Type of the left-hand side matrix operand
598  , typename VT2 > // Type of the right-hand side vector operand
599  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
600  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
601  {
602  using boost::numeric_cast;
603 
604  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
605  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
606  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
607 
608  const int M ( numeric_cast<int>( A.rows() ) );
609  const int N ( numeric_cast<int>( A.columns() ) );
610  const int lda( numeric_cast<int>( A.spacing() ) );
611 
612  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
613  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
614  }
616 #endif
617  //**********************************************************************************************
618 
619  //**BLAS-based assignment to dense vectors (single precision complex)***************************
620 #if BLAZE_BLAS_MODE
621 
634  template< typename VT1 // Type of the left-hand side target vector
635  , typename MT1 // Type of the left-hand side matrix operand
636  , typename VT2 > // Type of the right-hand side vector operand
637  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
638  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
639  {
640  using boost::numeric_cast;
641 
642  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
643  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
644  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
645  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
646  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
647  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
648 
649  const int M ( numeric_cast<int>( A.rows() ) );
650  const int N ( numeric_cast<int>( A.columns() ) );
651  const int lda( numeric_cast<int>( A.spacing() ) );
652  const complex<float> alpha( 1.0F, 0.0F );
653  const complex<float> beta ( 0.0F, 0.0F );
654 
655  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
656  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
657  }
659 #endif
660  //**********************************************************************************************
661 
662  //**BLAS-based assignment to dense vectors (double precision complex)***************************
663 #if BLAZE_BLAS_MODE
664 
677  template< typename VT1 // Type of the left-hand side target vector
678  , typename MT1 // Type of the left-hand side matrix operand
679  , typename VT2 > // Type of the right-hand side vector operand
680  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
681  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
682  {
683  using boost::numeric_cast;
684 
685  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
686  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
687  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
688  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
689  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
690  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
691 
692  const int M ( numeric_cast<int>( A.rows() ) );
693  const int N ( numeric_cast<int>( A.columns() ) );
694  const int lda( numeric_cast<int>( A.spacing() ) );
695  const complex<double> alpha( 1.0, 0.0 );
696  const complex<double> beta ( 0.0, 0.0 );
697 
698  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
699  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
700  }
702 #endif
703  //**********************************************************************************************
704 
705  //**Assignment to sparse vectors****************************************************************
717  template< typename VT1 > // Type of the target sparse vector
718  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
719  {
721 
724  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
725 
726  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
727 
728  const ResultType tmp( rhs );
729  assign( ~lhs, tmp );
730  }
732  //**********************************************************************************************
733 
734  //**Addition assignment to dense vectors********************************************************
747  template< typename VT1 > // Type of the target dense vector
748  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
749  {
751 
752  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
753 
754  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
755  return;
756  }
757 
758  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
759  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
760 
761  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
762  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
763  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
764  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
765 
766  if( ( IsComputation<MT>::value && !evaluate ) ||
767  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
768  TDMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
769  else
770  TDMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
771  }
773  //**********************************************************************************************
774 
775  //**Default addition assignment to dense vectors************************************************
789  template< typename VT1 // Type of the left-hand side target vector
790  , typename MT1 // Type of the left-hand side matrix operand
791  , typename VT2 > // Type of the right-hand side vector operand
792  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
793  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
794  {
795  const size_t M( A.rows() );
796  const size_t N( A.columns() );
797 
798  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
799  const size_t iend( M & size_t(-2) );
800 
801  for( size_t j=0UL; j<N; ++j ) {
802  for( size_t i=0UL; i<iend; i+=2UL ) {
803  y[i ] += x[j] * A(i ,j);
804  y[i+1UL] += x[j] * A(i+1UL,j);
805  }
806  if( iend < M ) {
807  y[iend] += x[j] * A(iend,j);
808  }
809  }
810  }
812  //**********************************************************************************************
813 
814  //**Vectorized default addition assignment to dense vectors*************************************
828  template< typename VT1 // Type of the left-hand side target vector
829  , typename MT1 // Type of the left-hand side matrix operand
830  , typename VT2 > // Type of the right-hand side vector operand
831  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
832  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
833  {
834  typedef IntrinsicTrait<ElementType> IT;
835 
836  const size_t M( A.rows() );
837  const size_t N( A.columns() );
838 
839  size_t i( 0UL );
840 
841  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
842  IntrinsicType xmm1( load( &y[i ] ) );
843  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
844  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
845  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
846  IntrinsicType xmm5( load( &y[i+IT::size*4UL] ) );
847  IntrinsicType xmm6( load( &y[i+IT::size*5UL] ) );
848  IntrinsicType xmm7( load( &y[i+IT::size*6UL] ) );
849  IntrinsicType xmm8( load( &y[i+IT::size*7UL] ) );
850  for( size_t j=0UL; j<N; ++j ) {
851  const IntrinsicType x1( set( x[j] ) );
852  xmm1 = xmm1 + A.get(i ,j) * x1;
853  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
854  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
855  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
856  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
857  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
858  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
859  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
860  }
861  store( &y[i ], xmm1 );
862  store( &y[i+IT::size ], xmm2 );
863  store( &y[i+IT::size*2UL], xmm3 );
864  store( &y[i+IT::size*3UL], xmm4 );
865  store( &y[i+IT::size*4UL], xmm5 );
866  store( &y[i+IT::size*5UL], xmm6 );
867  store( &y[i+IT::size*6UL], xmm7 );
868  store( &y[i+IT::size*7UL], xmm8 );
869  }
870  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
871  IntrinsicType xmm1( load( &y[i ] ) );
872  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
873  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
874  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
875  for( size_t j=0UL; j<N; ++j ) {
876  const IntrinsicType x1( set( x[j] ) );
877  xmm1 = xmm1 + A.get(i ,j) * x1;
878  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
879  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
880  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
881  }
882  store( &y[i ], xmm1 );
883  store( &y[i+IT::size ], xmm2 );
884  store( &y[i+IT::size*2UL], xmm3 );
885  store( &y[i+IT::size*3UL], xmm4 );
886  }
887  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
888  IntrinsicType xmm1( load( &y[i ] ) );
889  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
890  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
891  for( size_t j=0UL; j<N; ++j ) {
892  const IntrinsicType x1( set( x[j] ) );
893  xmm1 = xmm1 + A.get(i ,j) * x1;
894  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
895  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
896  }
897  store( &y[i ], xmm1 );
898  store( &y[i+IT::size ], xmm2 );
899  store( &y[i+IT::size*2UL], xmm3 );
900  }
901  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
902  IntrinsicType xmm1( load( &y[i ] ) );
903  IntrinsicType xmm2( load( &y[i+IT::size] ) );
904  for( size_t j=0UL; j<N; ++j ) {
905  const IntrinsicType x1( set( x[j] ) );
906  xmm1 = xmm1 + A.get(i ,j) * x1;
907  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
908  }
909  store( &y[i ], xmm1 );
910  store( &y[i+IT::size], xmm2 );
911  }
912  if( i < M ) {
913  IntrinsicType xmm1( load( &y[i] ) );
914  for( size_t j=0UL; j<N; ++j ) {
915  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
916  }
917  store( &y[i], xmm1 );
918  }
919  }
921  //**********************************************************************************************
922 
923  //**BLAS-based addition assignment to dense vectors (default)***********************************
937  template< typename VT1 // Type of the left-hand side target vector
938  , typename MT1 // Type of the left-hand side matrix operand
939  , typename VT2 > // Type of the right-hand side vector operand
940  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
941  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
942  {
943  selectDefaultAddAssignKernel( y, A, x );
944  }
946  //**********************************************************************************************
947 
948  //**BLAS-based addition assignment to dense vectors (single precision)**************************
949 #if BLAZE_BLAS_MODE
950 
963  template< typename VT1 // Type of the left-hand side target vector
964  , typename MT1 // Type of the left-hand side matrix operand
965  , typename VT2 > // Type of the right-hand side vector operand
966  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
967  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
968  {
969  using boost::numeric_cast;
970 
971  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
972  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
973  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
974 
975  const int M ( numeric_cast<int>( A.rows() ) );
976  const int N ( numeric_cast<int>( A.columns() ) );
977  const int lda( numeric_cast<int>( A.spacing() ) );
978 
979  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
980  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
981  }
983 #endif
984  //**********************************************************************************************
985 
986  //**BLAS-based addition assignment to dense vectors (double precision)**************************
987 #if BLAZE_BLAS_MODE
988 
1001  template< typename VT1 // Type of the left-hand side target vector
1002  , typename MT1 // Type of the left-hand side matrix operand
1003  , typename VT2 > // Type of the right-hand side vector operand
1004  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1005  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1006  {
1007  using boost::numeric_cast;
1008 
1009  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1010  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1011  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1012 
1013  const int M ( numeric_cast<int>( A.rows() ) );
1014  const int N ( numeric_cast<int>( A.columns() ) );
1015  const int lda( numeric_cast<int>( A.spacing() ) );
1016 
1017  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
1018  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1019  }
1021 #endif
1022  //**********************************************************************************************
1023 
1024  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1025 #if BLAZE_BLAS_MODE
1026 
1039  template< typename VT1 // Type of the left-hand side target vector
1040  , typename MT1 // Type of the left-hand side matrix operand
1041  , typename VT2 > // Type of the right-hand side vector operand
1042  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1043  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1044  {
1045  using boost::numeric_cast;
1046 
1047  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1048  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1049  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1050  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1051  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1052  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1053 
1054  const int M ( numeric_cast<int>( A.rows() ) );
1055  const int N ( numeric_cast<int>( A.columns() ) );
1056  const int lda( numeric_cast<int>( A.spacing() ) );
1057  const complex<float> alpha( 1.0F, 0.0F );
1058  const complex<float> beta ( 1.0F, 0.0F );
1059 
1060  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1061  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1062  }
1064 #endif
1065  //**********************************************************************************************
1066 
1067  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1068 #if BLAZE_BLAS_MODE
1069 
1082  template< typename VT1 // Type of the left-hand side target vector
1083  , typename MT1 // Type of the left-hand side matrix operand
1084  , typename VT2 > // Type of the right-hand side vector operand
1085  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1086  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1087  {
1088  using boost::numeric_cast;
1089 
1090  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1091  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1092  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1093  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1094  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1095  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1096 
1097  const int M ( numeric_cast<int>( A.rows() ) );
1098  const int N ( numeric_cast<int>( A.columns() ) );
1099  const int lda( numeric_cast<int>( A.spacing() ) );
1100  const complex<double> alpha( 1.0, 0.0 );
1101  const complex<double> beta ( 1.0, 0.0 );
1102 
1103  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1104  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1105  }
1107 #endif
1108  //**********************************************************************************************
1109 
1110  //**Addition assignment to sparse vectors*******************************************************
1111  // No special implementation for the addition assignment to sparse vectors.
1112  //**********************************************************************************************
1113 
1114  //**Subtraction assignment to dense vectors*****************************************************
1127  template< typename VT1 > // Type of the target dense vector
1128  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1129  {
1131 
1132  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1133 
1134  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1135  return;
1136  }
1137 
1138  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1139  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1140 
1141  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1142  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1143  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1144  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1145 
1146  if( ( IsComputation<MT>::value && !evaluate ) ||
1147  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1148  TDMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1149  else
1150  TDMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1151  }
1153  //**********************************************************************************************
1154 
1155  //**Default subtraction assignment to dense vectors*********************************************
1169  template< typename VT1 // Type of the left-hand side target vector
1170  , typename MT1 // Type of the left-hand side matrix operand
1171  , typename VT2 > // Type of the right-hand side vector operand
1172  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1173  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1174  {
1175  const size_t M( A.rows() );
1176  const size_t N( A.columns() );
1177 
1178  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1179  const size_t iend( M & size_t(-2) );
1180 
1181  for( size_t j=0UL; j<N; ++j ) {
1182  for( size_t i=0UL; i<iend; i+=2UL ) {
1183  y[i ] -= x[j] * A(i ,j);
1184  y[i+1UL] -= x[j] * A(i+1UL,j);
1185  }
1186  if( iend < M ) {
1187  y[iend] -= x[j] * A(iend,j);
1188  }
1189  }
1190  }
1192  //**********************************************************************************************
1193 
1194  //**Vectorized default subtraction assignment to dense vectors**********************************
1208  template< typename VT1 // Type of the left-hand side target vector
1209  , typename MT1 // Type of the left-hand side matrix operand
1210  , typename VT2 > // Type of the right-hand side vector operand
1211  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1212  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1213  {
1214  typedef IntrinsicTrait<ElementType> IT;
1215 
1216  const size_t M( A.rows() );
1217  const size_t N( A.columns() );
1218 
1219  size_t i( 0UL );
1220 
1221  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1222  IntrinsicType xmm1( load( &y[i ] ) );
1223  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1224  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1225  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
1226  IntrinsicType xmm5( load( &y[i+IT::size*4UL] ) );
1227  IntrinsicType xmm6( load( &y[i+IT::size*5UL] ) );
1228  IntrinsicType xmm7( load( &y[i+IT::size*6UL] ) );
1229  IntrinsicType xmm8( load( &y[i+IT::size*7UL] ) );
1230  for( size_t j=0UL; j<N; ++j ) {
1231  const IntrinsicType x1( set( x[j] ) );
1232  xmm1 = xmm1 - A.get(i ,j) * x1;
1233  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1234  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1235  xmm4 = xmm4 - A.get(i+IT::size*3UL,j) * x1;
1236  xmm5 = xmm5 - A.get(i+IT::size*4UL,j) * x1;
1237  xmm6 = xmm6 - A.get(i+IT::size*5UL,j) * x1;
1238  xmm7 = xmm7 - A.get(i+IT::size*6UL,j) * x1;
1239  xmm8 = xmm8 - A.get(i+IT::size*7UL,j) * x1;
1240  }
1241  store( &y[i ], xmm1 );
1242  store( &y[i+IT::size ], xmm2 );
1243  store( &y[i+IT::size*2UL], xmm3 );
1244  store( &y[i+IT::size*3UL], xmm4 );
1245  store( &y[i+IT::size*4UL], xmm5 );
1246  store( &y[i+IT::size*5UL], xmm6 );
1247  store( &y[i+IT::size*6UL], xmm7 );
1248  store( &y[i+IT::size*7UL], xmm8 );
1249  }
1250  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1251  IntrinsicType xmm1( load( &y[i ] ) );
1252  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1253  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1254  IntrinsicType xmm4( load( &y[i+IT::size*3UL] ) );
1255  for( size_t j=0UL; j<N; ++j ) {
1256  const IntrinsicType x1( set( x[j] ) );
1257  xmm1 = xmm1 - A.get(i ,j) * x1;
1258  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1259  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1260  xmm4 = xmm4 - A.get(i+IT::size*3UL,j) * x1;
1261  }
1262  store( &y[i ], xmm1 );
1263  store( &y[i+IT::size ], xmm2 );
1264  store( &y[i+IT::size*2UL], xmm3 );
1265  store( &y[i+IT::size*3UL], xmm4 );
1266  }
1267  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1268  IntrinsicType xmm1( load( &y[i ] ) );
1269  IntrinsicType xmm2( load( &y[i+IT::size ] ) );
1270  IntrinsicType xmm3( load( &y[i+IT::size*2UL] ) );
1271  for( size_t j=0UL; j<N; ++j ) {
1272  const IntrinsicType x1( set( x[j] ) );
1273  xmm1 = xmm1 - A.get(i ,j) * x1;
1274  xmm2 = xmm2 - A.get(i+IT::size ,j) * x1;
1275  xmm3 = xmm3 - A.get(i+IT::size*2UL,j) * x1;
1276  }
1277  store( &y[i ], xmm1 );
1278  store( &y[i+IT::size ], xmm2 );
1279  store( &y[i+IT::size*2UL], xmm3 );
1280  }
1281  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1282  IntrinsicType xmm1( load( &y[i ] ) );
1283  IntrinsicType xmm2( load( &y[i+IT::size] ) );
1284  for( size_t j=0UL; j<N; ++j ) {
1285  const IntrinsicType x1( set( x[j] ) );
1286  xmm1 = xmm1 - A.get(i ,j) * x1;
1287  xmm2 = xmm2 - A.get(i+IT::size,j) * x1;
1288  }
1289  store( &y[i ], xmm1 );
1290  store( &y[i+IT::size], xmm2 );
1291  }
1292  if( i < M ) {
1293  IntrinsicType xmm1( load( &y[i] ) );
1294  for( size_t j=0UL; j<N; ++j ) {
1295  xmm1 = xmm1 - A.get(i,j) * set( x[j] );
1296  }
1297  store( &y[i], xmm1 );
1298  }
1299  }
1301  //**********************************************************************************************
1302 
1303  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1317  template< typename VT1 // Type of the left-hand side target vector
1318  , typename MT1 // Type of the left-hand side matrix operand
1319  , typename VT2 > // Type of the right-hand side vector operand
1320  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1321  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1322  {
1323  selectDefaultSubAssignKernel( y, A, x );
1324  }
1326  //**********************************************************************************************
1327 
1328  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1329 #if BLAZE_BLAS_MODE
1330 
1343  template< typename VT1 // Type of the left-hand side target vector
1344  , typename MT1 // Type of the left-hand side matrix operand
1345  , typename VT2 > // Type of the right-hand side vector operand
1346  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1347  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1348  {
1349  using boost::numeric_cast;
1350 
1351  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1352  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1353  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1354 
1355  const int M ( numeric_cast<int>( A.rows() ) );
1356  const int N ( numeric_cast<int>( A.columns() ) );
1357  const int lda( numeric_cast<int>( A.spacing() ) );
1358 
1359  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -1.0F,
1360  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1361  }
1363 #endif
1364  //**********************************************************************************************
1365 
1366  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1367 #if BLAZE_BLAS_MODE
1368 
1381  template< typename VT1 // Type of the left-hand side target vector
1382  , typename MT1 // Type of the left-hand side matrix operand
1383  , typename VT2 > // Type of the right-hand side vector operand
1384  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1385  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1386  {
1387  using boost::numeric_cast;
1388 
1389  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1390  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1391  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1392 
1393  const int M ( numeric_cast<int>( A.rows() ) );
1394  const int N ( numeric_cast<int>( A.columns() ) );
1395  const int lda( numeric_cast<int>( A.spacing() ) );
1396 
1397  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -1.0,
1398  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1399  }
1401 #endif
1402  //**********************************************************************************************
1403 
1404  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1405 #if BLAZE_BLAS_MODE
1406 
1419  template< typename VT1 // Type of the left-hand side target vector
1420  , typename MT1 // Type of the left-hand side matrix operand
1421  , typename VT2 > // Type of the right-hand side vector operand
1422  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1423  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1424  {
1425  using boost::numeric_cast;
1426 
1427  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1428  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1429  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1430  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1431  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1432  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1433 
1434  const int M ( numeric_cast<int>( A.rows() ) );
1435  const int N ( numeric_cast<int>( A.columns() ) );
1436  const int lda( numeric_cast<int>( A.spacing() ) );
1437  const complex<float> alpha( -1.0F, 0.0F );
1438  const complex<float> beta ( 1.0F, 0.0F );
1439 
1440  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1441  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1442  }
1444 #endif
1445  //**********************************************************************************************
1446 
1447  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1448 #if BLAZE_BLAS_MODE
1449 
1462  template< typename VT1 // Type of the left-hand side target vector
1463  , typename MT1 // Type of the left-hand side matrix operand
1464  , typename VT2 > // Type of the right-hand side vector operand
1465  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1466  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1467  {
1468  using boost::numeric_cast;
1469 
1470  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1471  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1472  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1473  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1474  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1475  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1476 
1477  const int M ( numeric_cast<int>( A.rows() ) );
1478  const int N ( numeric_cast<int>( A.columns() ) );
1479  const int lda( numeric_cast<int>( A.spacing() ) );
1480  const complex<double> alpha( -1.0, 0.0 );
1481  const complex<double> beta ( 1.0, 0.0 );
1482 
1483  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1484  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1485  }
1487 #endif
1488  //**********************************************************************************************
1489 
1490  //**Subtraction assignment to sparse vectors****************************************************
1491  // No special implementation for the subtraction assignment to sparse vectors.
1492  //**********************************************************************************************
1493 
1494  //**Multiplication assignment to dense vectors**************************************************
1507  template< typename VT1 > // Type of the target dense vector
1508  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1509  {
1511 
1514  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1515 
1516  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1517 
1518  const ResultType tmp( rhs );
1519  multAssign( ~lhs, tmp );
1520  }
1522  //**********************************************************************************************
1523 
1524  //**Multiplication assignment to sparse vectors*************************************************
1525  // No special implementation for the multiplication assignment to sparse vectors.
1526  //**********************************************************************************************
1527 
1528  //**Compile time checks*************************************************************************
1535  //**********************************************************************************************
1536 };
1537 //*************************************************************************************************
1538 
1539 
1540 
1541 
1542 //=================================================================================================
1543 //
1544 // DVECSCALARMULTEXPR SPECIALIZATION
1545 //
1546 //=================================================================================================
1547 
1548 //*************************************************************************************************
1557 template< typename MT // Type of the left-hand side dense matrix
1558  , typename VT // Type of the right-hand side dense vector
1559  , typename ST > // Type of the side scalar value
1560 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
1561  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1562  , private VecScalarMultExpr
1563  , private Computation
1564 {
1565  private:
1566  //**Type definitions****************************************************************************
1567  typedef TDMatDVecMultExpr<MT,VT> MVM;
1568  typedef typename MVM::ResultType RES;
1569  typedef typename MT::ResultType MRT;
1570  typedef typename VT::ResultType VRT;
1571  typedef typename MRT::ElementType MET;
1572  typedef typename VRT::ElementType VET;
1573  typedef typename MT::CompositeType MCT;
1574  typedef typename VT::CompositeType VCT;
1575  //**********************************************************************************************
1576 
1577  //**********************************************************************************************
1579  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1580  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1581  //**********************************************************************************************
1582 
1583  //**********************************************************************************************
1585 
1588  template< typename T1, typename T2, typename T3, typename T4 >
1589  struct UseSinglePrecisionKernel {
1590  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1591  IsFloat<typename T1::ElementType>::value &&
1592  IsFloat<typename T2::ElementType>::value &&
1593  IsFloat<typename T3::ElementType>::value &&
1594  !IsComplex<T4>::value };
1595  };
1596  //**********************************************************************************************
1597 
1598  //**********************************************************************************************
1600 
1603  template< typename T1, typename T2, typename T3, typename T4 >
1604  struct UseDoublePrecisionKernel {
1605  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1606  IsDouble<typename T1::ElementType>::value &&
1607  IsDouble<typename T2::ElementType>::value &&
1608  IsDouble<typename T3::ElementType>::value &&
1609  !IsComplex<T4>::value };
1610  };
1611  //**********************************************************************************************
1612 
1613  //**********************************************************************************************
1615 
1618  template< typename T1, typename T2, typename T3 >
1619  struct UseSinglePrecisionComplexKernel {
1620  typedef complex<float> Type;
1621  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1622  IsSame<typename T1::ElementType,Type>::value &&
1623  IsSame<typename T2::ElementType,Type>::value &&
1624  IsSame<typename T3::ElementType,Type>::value };
1625  };
1626  //**********************************************************************************************
1627 
1628  //**********************************************************************************************
1630 
1633  template< typename T1, typename T2, typename T3 >
1634  struct UseDoublePrecisionComplexKernel {
1635  typedef complex<double> Type;
1636  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1637  IsSame<typename T1::ElementType,Type>::value &&
1638  IsSame<typename T2::ElementType,Type>::value &&
1639  IsSame<typename T3::ElementType,Type>::value };
1640  };
1641  //**********************************************************************************************
1642 
1643  //**********************************************************************************************
1645 
1647  template< typename T1, typename T2, typename T3, typename T4 >
1648  struct UseDefaultKernel {
1649  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1650  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1651  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1652  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1653  };
1654  //**********************************************************************************************
1655 
1656  //**********************************************************************************************
1658 
1661  template< typename T1, typename T2, typename T3, typename T4 >
1662  struct UseVectorizedDefaultKernel {
1663  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1664  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1665  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1666  IsSame<typename T1::ElementType,T4>::value &&
1667  IntrinsicTrait<typename T1::ElementType>::addition &&
1668  IntrinsicTrait<typename T1::ElementType>::multiplication };
1669  };
1670  //**********************************************************************************************
1671 
1672  public:
1673  //**Type definitions****************************************************************************
1674  typedef DVecScalarMultExpr<MVM,ST,false> This;
1675  typedef typename MultTrait<RES,ST>::Type ResultType;
1676  typedef typename ResultType::TransposeType TransposeType;
1677  typedef typename ResultType::ElementType ElementType;
1678  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1679  typedef const ElementType ReturnType;
1680  typedef const ResultType CompositeType;
1681 
1683  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
1684 
1686  typedef ST RightOperand;
1687 
1689  typedef typename SelectType< evaluate, const MRT, MCT >::Type LT;
1690 
1692  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type RT;
1693  //**********************************************************************************************
1694 
1695  //**Compilation flags***************************************************************************
1697  enum { vectorizable = 0 };
1698  //**********************************************************************************************
1699 
1700  //**Constructor*********************************************************************************
1706  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1707  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1708  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1709  {}
1710  //**********************************************************************************************
1711 
1712  //**Subscript operator**************************************************************************
1718  inline ReturnType operator[]( size_t index ) const {
1719  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1720  return vector_[index] * scalar_;
1721  }
1722  //**********************************************************************************************
1723 
1724  //**Size function*******************************************************************************
1729  inline size_t size() const {
1730  return vector_.size();
1731  }
1732  //**********************************************************************************************
1733 
1734  //**Left operand access*************************************************************************
1739  inline LeftOperand leftOperand() const {
1740  return vector_;
1741  }
1742  //**********************************************************************************************
1743 
1744  //**Right operand access************************************************************************
1749  inline RightOperand rightOperand() const {
1750  return scalar_;
1751  }
1752  //**********************************************************************************************
1753 
1754  //**********************************************************************************************
1760  template< typename T >
1761  inline bool canAlias( const T* alias ) const {
1762  return vector_.canAlias( alias );
1763  }
1764  //**********************************************************************************************
1765 
1766  //**********************************************************************************************
1772  template< typename T >
1773  inline bool isAliased( const T* alias ) const {
1774  return vector_.isAliased( alias );
1775  }
1776  //**********************************************************************************************
1777 
1778  private:
1779  //**Member variables****************************************************************************
1780  LeftOperand vector_;
1781  RightOperand scalar_;
1782  //**********************************************************************************************
1783 
1784  //**Assignment to dense vectors*****************************************************************
1796  template< typename VT1 > // Type of the target dense vector
1797  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1798  {
1800 
1801  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1802 
1803  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1804  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1805 
1806  if( left.rows() == 0UL ) {
1807  return;
1808  }
1809  else if( left.columns() == 0UL ) {
1810  reset( ~lhs );
1811  return;
1812  }
1813 
1814  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1815  RT x( right ); // Evaluation of the right-hand side dense vector operand
1816 
1817  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1818  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1819  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1820  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1821 
1822  if( ( IsComputation<MT>::value && !evaluate ) ||
1823  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1824  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1825  else
1826  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1827  }
1828  //**********************************************************************************************
1829 
1830  //**Default assignment to dense vectors*********************************************************
1844  template< typename VT1 // Type of the left-hand side target vector
1845  , typename MT1 // Type of the left-hand side matrix operand
1846  , typename VT2 // Type of the right-hand side vector operand
1847  , typename ST2 > // Type of the scalar value
1848  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1849  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1850  {
1851  const size_t M( A.rows() );
1852  const size_t N( A.columns() );
1853 
1854  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1855  const size_t iend( M & size_t(-2) );
1856 
1857  for( size_t i=0UL; i<M; ++i ) {
1858  y[i] = x[0UL] * A(i,0UL);
1859  }
1860  for( size_t j=1UL; j<N; ++j ) {
1861  for( size_t i=0UL; i<iend; i+=2UL ) {
1862  y[i ] += x[j] * A(i ,j);
1863  y[i+1UL] += x[j] * A(i+1UL,j);
1864  }
1865  if( iend < M ) {
1866  y[iend] += x[j] * A(iend,j);
1867  }
1868  }
1869  for( size_t i=0UL; i<M; ++i ) {
1870  y[i] *= scalar;
1871  }
1872  }
1873  //**********************************************************************************************
1874 
1875  //**Vectorized default assignment to dense vectors**********************************************
1889  template< typename VT1 // Type of the left-hand side target vector
1890  , typename MT1 // Type of the left-hand side matrix operand
1891  , typename VT2 // Type of the right-hand side vector operand
1892  , typename ST2 > // Type of the scalar value
1893  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1894  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1895  {
1896  typedef IntrinsicTrait<ElementType> IT;
1897 
1898  const size_t M( A.rows() );
1899  const size_t N( A.columns() );
1900 
1901  const IntrinsicType factor( set( scalar ) );
1902 
1903  size_t i( 0UL );
1904 
1905  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1906  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1907  for( size_t j=0UL; j<N; ++j ) {
1908  const IntrinsicType x1( set( x[j] ) );
1909  xmm1 = xmm1 + A.get(i ,j) * x1;
1910  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1911  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1912  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
1913  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
1914  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
1915  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
1916  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
1917  }
1918  store( &y[i ], xmm1*factor );
1919  store( &y[i+IT::size ], xmm2*factor );
1920  store( &y[i+IT::size*2UL], xmm3*factor );
1921  store( &y[i+IT::size*3UL], xmm4*factor );
1922  store( &y[i+IT::size*4UL], xmm5*factor );
1923  store( &y[i+IT::size*5UL], xmm6*factor );
1924  store( &y[i+IT::size*6UL], xmm7*factor );
1925  store( &y[i+IT::size*7UL], xmm8*factor );
1926  }
1927  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1928  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1929  for( size_t j=0UL; j<N; ++j ) {
1930  const IntrinsicType x1( set( x[j] ) );
1931  xmm1 = xmm1 + A.get(i ,j) * x1;
1932  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1933  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1934  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
1935  }
1936  store( &y[i ], xmm1*factor );
1937  store( &y[i+IT::size ], xmm2*factor );
1938  store( &y[i+IT::size*2UL], xmm3*factor );
1939  store( &y[i+IT::size*3UL], xmm4*factor );
1940  }
1941  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1942  IntrinsicType xmm1, xmm2, xmm3;
1943  for( size_t j=0UL; j<N; ++j ) {
1944  const IntrinsicType x1( set( x[j] ) );
1945  xmm1 = xmm1 + A.get(i ,j) * x1;
1946  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
1947  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
1948  }
1949  store( &y[i ], xmm1*factor );
1950  store( &y[i+IT::size ], xmm2*factor );
1951  store( &y[i+IT::size*2UL], xmm3*factor );
1952  }
1953  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1954  IntrinsicType xmm1, xmm2;
1955  for( size_t j=0UL; j<N; ++j ) {
1956  const IntrinsicType x1( set( x[j] ) );
1957  xmm1 = xmm1 + A.get(i ,j) * x1;
1958  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
1959  }
1960  store( &y[i ], xmm1*factor );
1961  store( &y[i+IT::size], xmm2*factor );
1962  }
1963  if( i < M ) {
1964  IntrinsicType xmm1;
1965  for( size_t j=0UL; j<N; ++j ) {
1966  const IntrinsicType x1( set( x[j] ) );
1967  xmm1 = xmm1 + A.get(i,j) * x1;
1968  }
1969  store( &y[i], xmm1*factor );
1970  }
1971  }
1972  //**********************************************************************************************
1973 
1974  //**BLAS-based assignment to dense vectors (default)********************************************
1988  template< typename VT1 // Type of the left-hand side target vector
1989  , typename MT1 // Type of the left-hand side matrix operand
1990  , typename VT2 // Type of the right-hand side vector operand
1991  , typename ST2 > // Type of the scalar value
1992  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1993  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1994  {
1995  selectDefaultAssignKernel( y, A, x, scalar );
1996  }
1997  //**********************************************************************************************
1998 
1999  //**BLAS-based assignment to dense vectors (single precision)***********************************
2000 #if BLAZE_BLAS_MODE
2001 
2014  template< typename VT1 // Type of the left-hand side target vector
2015  , typename MT1 // Type of the left-hand side matrix operand
2016  , typename VT2 // Type of the right-hand side vector operand
2017  , typename ST2 > // Type of the scalar value
2018  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2019  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2020  {
2021  using boost::numeric_cast;
2022 
2023  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2024  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2025  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2026 
2027  const int M ( numeric_cast<int>( A.rows() ) );
2028  const int N ( numeric_cast<int>( A.columns() ) );
2029  const int lda( numeric_cast<int>( A.spacing() ) );
2030 
2031  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2032  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2033  }
2034 #endif
2035  //**********************************************************************************************
2036 
2037  //**BLAS-based assignment to dense vectors (double precision)***********************************
2038 #if BLAZE_BLAS_MODE
2039 
2052  template< typename VT1 // Type of the left-hand side target vector
2053  , typename MT1 // Type of the left-hand side matrix operand
2054  , typename VT2 // Type of the right-hand side vector operand
2055  , typename ST2 > // Type of the scalar value
2056  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2057  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2058  {
2059  using boost::numeric_cast;
2060 
2061  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2062  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2063  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2064 
2065  const int M ( numeric_cast<int>( A.rows() ) );
2066  const int N ( numeric_cast<int>( A.columns() ) );
2067  const int lda( numeric_cast<int>( A.spacing() ) );
2068 
2069  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2070  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2071  }
2072 #endif
2073  //**********************************************************************************************
2074 
2075  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2076 #if BLAZE_BLAS_MODE
2077 
2090  template< typename VT1 // Type of the left-hand side target vector
2091  , typename MT1 // Type of the left-hand side matrix operand
2092  , typename VT2 // Type of the right-hand side vector operand
2093  , typename ST2 > // Type of the scalar value
2094  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2095  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2096  {
2097  using boost::numeric_cast;
2098 
2099  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2100  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2101  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2102  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2103  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2104  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2105 
2106  const int M ( numeric_cast<int>( A.rows() ) );
2107  const int N ( numeric_cast<int>( A.columns() ) );
2108  const int lda( numeric_cast<int>( A.spacing() ) );
2109  const complex<float> alpha( scalar );
2110  const complex<float> beta ( 0.0F, 0.0F );
2111 
2112  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2113  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2114  }
2115 #endif
2116  //**********************************************************************************************
2117 
2118  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2119 #if BLAZE_BLAS_MODE
2120 
2133  template< typename VT1 // Type of the left-hand side target vector
2134  , typename MT1 // Type of the left-hand side matrix operand
2135  , typename VT2 // Type of the right-hand side vector operand
2136  , typename ST2 > // Type of the scalar value
2137  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2138  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2139  {
2140  using boost::numeric_cast;
2141 
2142  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2143  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2144  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2145  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2146  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2147  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2148 
2149  const int M ( numeric_cast<int>( A.rows() ) );
2150  const int N ( numeric_cast<int>( A.columns() ) );
2151  const int lda( numeric_cast<int>( A.spacing() ) );
2152  const complex<double> alpha( scalar );
2153  const complex<double> beta ( 0.0, 0.0 );
2154 
2155  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2156  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2157  }
2158 #endif
2159  //**********************************************************************************************
2160 
2161  //**Assignment to sparse vectors****************************************************************
2173  template< typename VT1 > // Type of the target sparse vector
2174  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2175  {
2177 
2180  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2181 
2182  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2183 
2184  const ResultType tmp( rhs );
2185  assign( ~lhs, tmp );
2186  }
2187  //**********************************************************************************************
2188 
2189  //**Addition assignment to dense vectors********************************************************
2201  template< typename VT1 > // Type of the target dense vector
2202  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2203  {
2205 
2206  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2207 
2208  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2209  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2210 
2211  if( left.rows() == 0UL || left.columns() == 0UL ) {
2212  return;
2213  }
2214 
2215  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2216  RT x( right ); // Evaluation of the right-hand side dense vector operand
2217 
2218  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2219  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2220  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2221  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2222 
2223  if( ( IsComputation<MT>::value && !evaluate ) ||
2224  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2225  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2226  else
2227  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2228  }
2229  //**********************************************************************************************
2230 
2231  //**Default addition assignment to dense vectors************************************************
2245  template< typename VT1 // Type of the left-hand side target vector
2246  , typename MT1 // Type of the left-hand side matrix operand
2247  , typename VT2 // Type of the right-hand side vector operand
2248  , typename ST2 > // Type of the scalar value
2249  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2250  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2251  {
2252  y.addAssign( A * x * scalar );
2253  }
2254  //**********************************************************************************************
2255 
2256  //**Vectorized default addition assignment to dense vectors*************************************
2270  template< typename VT1 // Type of the left-hand side target vector
2271  , typename MT1 // Type of the left-hand side matrix operand
2272  , typename VT2 // Type of the right-hand side vector operand
2273  , typename ST2 > // Type of the scalar value
2274  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2275  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2276  {
2277  typedef IntrinsicTrait<ElementType> IT;
2278 
2279  const size_t M( A.rows() );
2280  const size_t N( A.columns() );
2281 
2282  const IntrinsicType factor( set( scalar ) );
2283 
2284  size_t i( 0UL );
2285 
2286  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2287  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2288  for( size_t j=0UL; j<N; ++j ) {
2289  const IntrinsicType x1( set( x[j] ) );
2290  xmm1 = xmm1 + A.get(i ,j) * x1;
2291  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2292  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2293  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2294  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
2295  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
2296  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
2297  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
2298  }
2299  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2300  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2301  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2302  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) + xmm4*factor );
2303  store( &y[i+IT::size*4UL], load( &y[i+IT::size*4UL] ) + xmm5*factor );
2304  store( &y[i+IT::size*5UL], load( &y[i+IT::size*5UL] ) + xmm6*factor );
2305  store( &y[i+IT::size*6UL], load( &y[i+IT::size*6UL] ) + xmm7*factor );
2306  store( &y[i+IT::size*7UL], load( &y[i+IT::size*7UL] ) + xmm8*factor );
2307  }
2308  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2309  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2310  for( size_t j=0UL; j<N; ++j ) {
2311  const IntrinsicType x1( set( x[j] ) );
2312  xmm1 = xmm1 + A.get(i ,j) * x1;
2313  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2314  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2315  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2316  }
2317  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2318  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2319  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2320  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) + xmm4*factor );
2321  }
2322  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2323  IntrinsicType xmm1, xmm2, xmm3;
2324  for( size_t j=0UL; j<N; ++j ) {
2325  const IntrinsicType x1( set( x[j] ) );
2326  xmm1 = xmm1 + A.get(i ,j) * x1;
2327  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2328  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2329  }
2330  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2331  store( &y[i+IT::size ], load( &y[i+IT::size ] ) + xmm2*factor );
2332  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) + xmm3*factor );
2333  }
2334  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2335  IntrinsicType xmm1, xmm2;
2336  for( size_t j=0UL; j<N; ++j ) {
2337  const IntrinsicType x1( set( x[j] ) );
2338  xmm1 = xmm1 + A.get(i ,j) * x1;
2339  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
2340  }
2341  store( &y[i ], load( &y[i ] ) + xmm1*factor );
2342  store( &y[i+IT::size], load( &y[i+IT::size] ) + xmm2*factor );
2343  }
2344  if( i < M ) {
2345  IntrinsicType xmm1;
2346  for( size_t j=0UL; j<N; ++j ) {
2347  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
2348  }
2349  store( &y[i], load( &y[i] ) + xmm1*factor );
2350  }
2351  }
2352  //**********************************************************************************************
2353 
2354  //**BLAS-based addition assignment to dense vectors (default)***********************************
2368  template< typename VT1 // Type of the left-hand side target vector
2369  , typename MT1 // Type of the left-hand side matrix operand
2370  , typename VT2 // Type of the right-hand side vector operand
2371  , typename ST2 > // Type of the scalar value
2372  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2373  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2374  {
2375  selectDefaultAddAssignKernel( y, A, x, scalar );
2376  }
2377  //**********************************************************************************************
2378 
2379  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2380 #if BLAZE_BLAS_MODE
2381 
2394  template< typename VT1 // Type of the left-hand side target vector
2395  , typename MT1 // Type of the left-hand side matrix operand
2396  , typename VT2 // Type of the right-hand side vector operand
2397  , typename ST2 > // Type of the scalar value
2398  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2399  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2400  {
2401  using boost::numeric_cast;
2402 
2403  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2404  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2405  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2406 
2407  const int M ( numeric_cast<int>( A.rows() ) );
2408  const int N ( numeric_cast<int>( A.columns() ) );
2409  const int lda( numeric_cast<int>( A.spacing() ) );
2410 
2411  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2412  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2413  }
2414 #endif
2415  //**********************************************************************************************
2416 
2417  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2418 #if BLAZE_BLAS_MODE
2419 
2432  template< typename VT1 // Type of the left-hand side target vector
2433  , typename MT1 // Type of the left-hand side matrix operand
2434  , typename VT2 // Type of the right-hand side vector operand
2435  , typename ST2 > // Type of the scalar value
2436  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2437  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2438  {
2439  using boost::numeric_cast;
2440 
2441  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2442  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2443  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2444 
2445  const int M ( numeric_cast<int>( A.rows() ) );
2446  const int N ( numeric_cast<int>( A.columns() ) );
2447  const int lda( numeric_cast<int>( A.spacing() ) );
2448 
2449  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2450  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2451  }
2452 #endif
2453  //**********************************************************************************************
2454 
2455  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2456 #if BLAZE_BLAS_MODE
2457 
2470  template< typename VT1 // Type of the left-hand side target vector
2471  , typename MT1 // Type of the left-hand side matrix operand
2472  , typename VT2 // Type of the right-hand side vector operand
2473  , typename ST2 > // Type of the scalar value
2474  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2475  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2476  {
2477  using boost::numeric_cast;
2478 
2479  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2480  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2481  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2482  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2483  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2484  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2485 
2486  const int M ( numeric_cast<int>( A.rows() ) );
2487  const int N ( numeric_cast<int>( A.columns() ) );
2488  const int lda( numeric_cast<int>( A.spacing() ) );
2489  const complex<float> alpha( scalar );
2490  const complex<float> beta ( 1.0F, 0.0F );
2491 
2492  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2493  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2494  }
2495 #endif
2496  //**********************************************************************************************
2497 
2498  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2499 #if BLAZE_BLAS_MODE
2500 
2513  template< typename VT1 // Type of the left-hand side target vector
2514  , typename MT1 // Type of the left-hand side matrix operand
2515  , typename VT2 // Type of the right-hand side vector operand
2516  , typename ST2 > // Type of the scalar value
2517  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2518  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2519  {
2520  using boost::numeric_cast;
2521 
2522  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2523  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2524  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2525  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2526  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2527  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2528 
2529  const int M ( numeric_cast<int>( A.rows() ) );
2530  const int N ( numeric_cast<int>( A.columns() ) );
2531  const int lda( numeric_cast<int>( A.spacing() ) );
2532  const complex<double> alpha( scalar );
2533  const complex<double> beta ( 1.0, 0.0 );
2534 
2535  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2536  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2537  }
2538 #endif
2539  //**********************************************************************************************
2540 
2541  //**Addition assignment to sparse vectors*******************************************************
2542  // No special implementation for the addition assignment to sparse vectors.
2543  //**********************************************************************************************
2544 
2545  //**Subtraction assignment to dense vectors*****************************************************
2557  template< typename VT1 > // Type of the target dense vector
2558  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2559  {
2561 
2562  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2563 
2564  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2565  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2566 
2567  if( left.rows() == 0UL || left.columns() == 0UL ) {
2568  return;
2569  }
2570 
2571  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2572  RT x( right ); // Evaluation of the right-hand side dense vector operand
2573 
2574  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2575  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2576  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2577  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2578 
2579  if( ( IsComputation<MT>::value && !evaluate ) ||
2580  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2581  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2582  else
2583  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2584  }
2585  //**********************************************************************************************
2586 
2587  //**Default subtraction assignment to dense vectors*********************************************
2601  template< typename VT1 // Type of the left-hand side target vector
2602  , typename MT1 // Type of the left-hand side matrix operand
2603  , typename VT2 // Type of the right-hand side vector operand
2604  , typename ST2 > // Type of the scalar value
2605  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2606  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2607  {
2608  y.subAssign( A * x * scalar );
2609  }
2610  //**********************************************************************************************
2611 
2612  //**Vectorized default subtraction assignment to dense vectors**********************************
2626  template< typename VT1 // Type of the left-hand side target vector
2627  , typename MT1 // Type of the left-hand side matrix operand
2628  , typename VT2 // Type of the right-hand side vector operand
2629  , typename ST2 > // Type of the scalar value
2630  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2631  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2632  {
2633  typedef IntrinsicTrait<ElementType> IT;
2634 
2635  const size_t M( A.rows() );
2636  const size_t N( A.columns() );
2637 
2638  const IntrinsicType factor( set( scalar ) );
2639 
2640  size_t i( 0UL );
2641 
2642  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2643  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2644  for( size_t j=0UL; j<N; ++j ) {
2645  const IntrinsicType x1( set( x[j] ) );
2646  xmm1 = xmm1 + A.get(i ,j) * x1;
2647  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2648  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2649  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2650  xmm5 = xmm5 + A.get(i+IT::size*4UL,j) * x1;
2651  xmm6 = xmm6 + A.get(i+IT::size*5UL,j) * x1;
2652  xmm7 = xmm7 + A.get(i+IT::size*6UL,j) * x1;
2653  xmm8 = xmm8 + A.get(i+IT::size*7UL,j) * x1;
2654  }
2655  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2656  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2657  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2658  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) - xmm4*factor );
2659  store( &y[i+IT::size*4UL], load( &y[i+IT::size*4UL] ) - xmm5*factor );
2660  store( &y[i+IT::size*5UL], load( &y[i+IT::size*5UL] ) - xmm6*factor );
2661  store( &y[i+IT::size*6UL], load( &y[i+IT::size*6UL] ) - xmm7*factor );
2662  store( &y[i+IT::size*7UL], load( &y[i+IT::size*7UL] ) - xmm8*factor );
2663  }
2664  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2665  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2666  for( size_t j=0UL; j<N; ++j ) {
2667  const IntrinsicType x1( set( x[j] ) );
2668  xmm1 = xmm1 + A.get(i ,j) * x1;
2669  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2670  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2671  xmm4 = xmm4 + A.get(i+IT::size*3UL,j) * x1;
2672  }
2673  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2674  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2675  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2676  store( &y[i+IT::size*3UL], load( &y[i+IT::size*3UL] ) - xmm4*factor );
2677  }
2678  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2679  IntrinsicType xmm1, xmm2, xmm3;
2680  for( size_t j=0UL; j<N; ++j ) {
2681  const IntrinsicType x1( set( x[j] ) );
2682  xmm1 = xmm1 + A.get(i ,j) * x1;
2683  xmm2 = xmm2 + A.get(i+IT::size ,j) * x1;
2684  xmm3 = xmm3 + A.get(i+IT::size*2UL,j) * x1;
2685  }
2686  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2687  store( &y[i+IT::size ], load( &y[i+IT::size ] ) - xmm2*factor );
2688  store( &y[i+IT::size*2UL], load( &y[i+IT::size*2UL] ) - xmm3*factor );
2689  }
2690  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2691  IntrinsicType xmm1, xmm2;
2692  for( size_t j=0UL; j<N; ++j ) {
2693  const IntrinsicType x1( set( x[j] ) );
2694  xmm1 = xmm1 + A.get(i ,j) * x1;
2695  xmm2 = xmm2 + A.get(i+IT::size,j) * x1;
2696  }
2697  store( &y[i ], load( &y[i ] ) - xmm1*factor );
2698  store( &y[i+IT::size], load( &y[i+IT::size] ) - xmm2*factor );
2699  }
2700  if( i < M ) {
2701  IntrinsicType xmm1;
2702  for( size_t j=0UL; j<N; ++j ) {
2703  xmm1 = xmm1 + A.get(i,j) * set( x[j] );
2704  }
2705  store( &y[i], load( &y[i] ) - xmm1*factor );
2706  }
2707  }
2708  //**********************************************************************************************
2709 
2710  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2724  template< typename VT1 // Type of the left-hand side target vector
2725  , typename MT1 // Type of the left-hand side matrix operand
2726  , typename VT2 // Type of the right-hand side vector operand
2727  , typename ST2 > // Type of the scalar value
2728  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2729  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2730  {
2731  selectDefaultSubAssignKernel( y, A, x, scalar );
2732  }
2733  //**********************************************************************************************
2734 
2735  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2736 #if BLAZE_BLAS_MODE
2737 
2750  template< typename VT1 // Type of the left-hand side target vector
2751  , typename MT1 // Type of the left-hand side matrix operand
2752  , typename VT2 // Type of the right-hand side vector operand
2753  , typename ST2 > // Type of the scalar value
2754  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2755  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2756  {
2757  using boost::numeric_cast;
2758 
2759  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2760  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2761  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2762 
2763  const int M ( numeric_cast<int>( A.rows() ) );
2764  const int N ( numeric_cast<int>( A.columns() ) );
2765  const int lda( numeric_cast<int>( A.spacing() ) );
2766 
2767  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2768  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2769  }
2770 #endif
2771  //**********************************************************************************************
2772 
2773  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2774 #if BLAZE_BLAS_MODE
2775 
2788  template< typename VT1 // Type of the left-hand side target vector
2789  , typename MT1 // Type of the left-hand side matrix operand
2790  , typename VT2 // Type of the right-hand side vector operand
2791  , typename ST2 > // Type of the scalar value
2792  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2793  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2794  {
2795  using boost::numeric_cast;
2796 
2797  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2798  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2799  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2800 
2801  const int M ( numeric_cast<int>( A.rows() ) );
2802  const int N ( numeric_cast<int>( A.columns() ) );
2803  const int lda( numeric_cast<int>( A.spacing() ) );
2804 
2805  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2806  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2807  }
2808 #endif
2809  //**********************************************************************************************
2810 
2811  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2812 #if BLAZE_BLAS_MODE
2813 
2826  template< typename VT1 // Type of the left-hand side target vector
2827  , typename MT1 // Type of the left-hand side matrix operand
2828  , typename VT2 // Type of the right-hand side vector operand
2829  , typename ST2 > // Type of the scalar value
2830  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2831  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2832  {
2833  using boost::numeric_cast;
2834 
2835  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2836  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2837  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2838  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2839  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2840  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2841 
2842  const int M ( numeric_cast<int>( A.rows() ) );
2843  const int N ( numeric_cast<int>( A.columns() ) );
2844  const int lda( numeric_cast<int>( A.spacing() ) );
2845  const complex<float> alpha( -scalar );
2846  const complex<float> beta ( 1.0F, 0.0F );
2847 
2848  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2849  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2850  }
2851 #endif
2852  //**********************************************************************************************
2853 
2854  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2855 #if BLAZE_BLAS_MODE
2856 
2869  template< typename VT1 // Type of the left-hand side target vector
2870  , typename MT1 // Type of the left-hand side matrix operand
2871  , typename VT2 // Type of the right-hand side vector operand
2872  , typename ST2 > // Type of the scalar value
2873  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2874  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2875  {
2876  using boost::numeric_cast;
2877 
2878  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2879  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2880  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2881  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2882  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2883  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2884 
2885  const int M ( numeric_cast<int>( A.rows() ) );
2886  const int N ( numeric_cast<int>( A.columns() ) );
2887  const int lda( numeric_cast<int>( A.spacing() ) );
2888  const complex<double> alpha( -scalar );
2889  const complex<double> beta ( 1.0, 0.0 );
2890 
2891  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2892  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2893  }
2894 #endif
2895  //**********************************************************************************************
2896 
2897  //**Subtraction assignment to sparse vectors****************************************************
2898  // No special implementation for the subtraction assignment to sparse vectors.
2899  //**********************************************************************************************
2900 
2901  //**Multiplication assignment to dense vectors**************************************************
2913  template< typename VT1 > // Type of the target dense vector
2914  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2915  {
2917 
2920  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2921 
2922  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2923 
2924  const ResultType tmp( rhs );
2925  multAssign( ~lhs, tmp );
2926  }
2927  //**********************************************************************************************
2928 
2929  //**Multiplication assignment to sparse vectors*************************************************
2930  // No special implementation for the multiplication assignment to sparse vectors.
2931  //**********************************************************************************************
2932 
2933  //**Compile time checks*************************************************************************
2942  //**********************************************************************************************
2943 };
2945 //*************************************************************************************************
2946 
2947 
2948 
2949 
2950 //=================================================================================================
2951 //
2952 // GLOBAL BINARY ARITHMETIC OPERATORS
2953 //
2954 //=================================================================================================
2955 
2956 //*************************************************************************************************
2987 template< typename T1 // Type of the left-hand side dense matrix
2988  , typename T2 > // Type of the right-hand side dense vector
2989 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
2991 {
2993 
2994  if( (~mat).columns() != (~vec).size() )
2995  throw std::invalid_argument( "Matrix and vector sizes do not match" );
2996 
2997  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
2998 }
2999 //*************************************************************************************************
3000 
3001 } // namespace blaze
3002 
3003 #endif