All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
50 #include <blaze/system/BLAS.h>
52 #include <blaze/util/Assert.h>
53 #include <blaze/util/Complex.h>
58 #include <blaze/util/DisableIf.h>
59 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/SelectType.h>
61 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // CLASS TDVECDMATMULTEXPR
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
84 template< typename VT // Type of the left-hand side dense vector
85  , typename MT > // Type of the right-hand side dense matrix
86 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
87  , private Expression
88  , private Computation
89 {
90  private:
91  //**Type definitions****************************************************************************
92  typedef typename VT::ResultType VRT;
93  typedef typename MT::ResultType MRT;
94  typedef typename VRT::ElementType VET;
95  typedef typename MRT::ElementType MET;
96  typedef typename VT::CompositeType VCT;
97  typedef typename MT::CompositeType MCT;
98  //**********************************************************************************************
99 
100  //**********************************************************************************************
102  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
104  //**********************************************************************************************
105 
106  //**********************************************************************************************
108 
109 
112  template< typename T1, typename T2, typename T3 >
113  struct UseSinglePrecisionKernel {
117  };
119  //**********************************************************************************************
120 
121  //**********************************************************************************************
123 
124 
127  template< typename T1, typename T2, typename T3 >
128  struct UseDoublePrecisionKernel {
129  enum { value = IsDouble<typename T1::ElementType>::value &&
130  IsDouble<typename T2::ElementType>::value &&
131  IsDouble<typename T3::ElementType>::value };
132  };
134  //**********************************************************************************************
135 
136  //**********************************************************************************************
138 
139 
142  template< typename T1, typename T2, typename T3 >
143  struct UseSinglePrecisionComplexKernel {
144  typedef complex<float> Type;
145  enum { value = IsSame<typename T1::ElementType,Type>::value &&
146  IsSame<typename T2::ElementType,Type>::value &&
147  IsSame<typename T3::ElementType,Type>::value };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
155 
158  template< typename T1, typename T2, typename T3 >
159  struct UseDoublePrecisionComplexKernel {
160  typedef complex<double> Type;
161  enum { value = IsSame<typename T1::ElementType,Type>::value &&
162  IsSame<typename T2::ElementType,Type>::value &&
163  IsSame<typename T3::ElementType,Type>::value };
164  };
166  //**********************************************************************************************
167 
168  //**********************************************************************************************
170 
171 
173  template< typename T1, typename T2, typename T3 >
174  struct UseDefaultKernel {
175  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
176  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
177  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
178  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
179  };
181  //**********************************************************************************************
182 
183  //**********************************************************************************************
185 
186 
189  template< typename T1, typename T2, typename T3 >
190  struct UseVectorizedDefaultKernel {
191  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
192  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
193  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
194  IntrinsicTrait<typename T1::ElementType>::addition &&
195  IntrinsicTrait<typename T1::ElementType>::multiplication };
196  };
198  //**********************************************************************************************
199 
200  public:
201  //**Type definitions****************************************************************************
204  typedef typename ResultType::TransposeType TransposeType;
205  typedef typename ResultType::ElementType ElementType;
207  typedef const ElementType ReturnType;
208  typedef const ResultType CompositeType;
209 
211  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
212 
214  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
215 
217  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
218 
221  //**********************************************************************************************
222 
223  //**Compilation flags***************************************************************************
225  enum { vectorizable = 0 };
226 
228  enum { canAlias = ( !IsComputation<VT>::value ) ||
229  ( !evaluate && IsComputation<MT>::value &&
231  //**********************************************************************************************
232 
233  //**Constructor*********************************************************************************
239  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
240  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
241  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
242  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
243  {
244  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
245  }
246  //**********************************************************************************************
247 
248  //**Subscript operator**************************************************************************
254  inline ReturnType operator[]( size_t index ) const {
255  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
256 
257  ElementType res;
258 
259  if( mat_.rows() != 0UL ) {
260  res = vec_[0UL] * mat_(0UL,index);
261  for( size_t j=1UL; j<end_; j+=2UL ) {
262  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
263  }
264  if( end_ < mat_.rows() ) {
265  res += vec_[end_] * mat_(end_,index);
266  }
267  }
268  else {
269  reset( res );
270  }
271 
272  return res;
273  }
274  //**********************************************************************************************
275 
276  //**Size function*******************************************************************************
281  inline size_t size() const {
282  return mat_.columns();
283  }
284  //**********************************************************************************************
285 
286  //**Left operand access*************************************************************************
291  inline LeftOperand leftOperand() const {
292  return vec_;
293  }
294  //**********************************************************************************************
295 
296  //**Right operand access************************************************************************
301  inline RightOperand rightOperand() const {
302  return mat_;
303  }
304  //**********************************************************************************************
305 
306  //**********************************************************************************************
312  template< typename T >
313  inline bool isAliased( const T* alias ) const {
314  return ( !IsComputation<VT>::value && vec_.isAliased( alias ) ) ||
316  CanAlias<MT>::value && mat_.isAliased( alias ) );
317  }
318  //**********************************************************************************************
319 
320  private:
321  //**Member variables****************************************************************************
324  const size_t end_;
325  //**********************************************************************************************
326 
327  //**Assignment to dense vectors*****************************************************************
340  template< typename VT1 > // Type of the target dense vector
341  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
342  {
343  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
344 
345  if( rhs.mat_.rows() == 0UL ) {
346  reset( ~lhs );
347  return;
348  }
349  else if( rhs.mat_.columns() == 0UL ) {
350  return;
351  }
352 
353  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
354  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
355 
356  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
357  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
358  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
359  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
360 
361  if( ( IsComputation<MT>::value && !evaluate ) ||
362  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
363  TDVecDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
364  else
365  TDVecDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
366  }
368  //**********************************************************************************************
369 
370  //**Default assignment to dense vectors*********************************************************
384  template< typename VT1 // Type of the left-hand side target vector
385  , typename VT2 // Type of the left-hand side vector operand
386  , typename MT1 > // Type of the right-hand side matrix operand
387  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
388  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
389  {
390  const size_t M( A.rows() );
391  const size_t N( A.columns() );
392 
393  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
394  const size_t jend( N & size_t(-2) );
395 
396  for( size_t j=0UL; j<N; ++j ) {
397  y[j] = x[0UL] * A(0UL,j);
398  }
399  for( size_t i=1UL; i<M; ++i ) {
400  for( size_t j=0UL; j<jend; j+=2UL ) {
401  y[j ] += x[i] * A(i,j );
402  y[j+1UL] += x[i] * A(i,j+1UL);
403  }
404  if( jend < N ) {
405  y[jend] += x[i] * A(i,jend);
406  }
407  }
408  }
410  //**********************************************************************************************
411 
412  //**Vectorized default assignment to dense vectors**********************************************
426  template< typename VT1 // Type of the left-hand side target vector
427  , typename VT2 // Type of the left-hand side vector operand
428  , typename MT1 > // Type of the right-hand side matrix operand
429  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
430  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
431  {
432  typedef IntrinsicTrait<ElementType> IT;
433 
434  const size_t M( A.rows() );
435  const size_t N( A.spacing() );
436 
437  size_t j( 0UL );
438 
439  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
440  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
441  for( size_t i=0UL; i<M; ++i ) {
442  const IntrinsicType x1( set( x[i] ) );
443  xmm1 = xmm1 + x1 * A.get(i,j );
444  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
445  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
446  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
447  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
448  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
449  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
450  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
451  }
452  store( &y[j ], xmm1 );
453  store( &y[j+IT::size ], xmm2 );
454  store( &y[j+IT::size*2UL], xmm3 );
455  store( &y[j+IT::size*3UL], xmm4 );
456  store( &y[j+IT::size*4UL], xmm5 );
457  store( &y[j+IT::size*5UL], xmm6 );
458  store( &y[j+IT::size*6UL], xmm7 );
459  store( &y[j+IT::size*7UL], xmm8 );
460  }
461  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
462  IntrinsicType xmm1, xmm2, xmm3, xmm4;
463  for( size_t i=0UL; i<M; ++i ) {
464  const IntrinsicType x1( set( x[i] ) );
465  xmm1 = xmm1 + x1 * A.get(i,j );
466  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
467  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
468  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
469  }
470  store( &y[j ], xmm1 );
471  store( &y[j+IT::size ], xmm2 );
472  store( &y[j+IT::size*2UL], xmm3 );
473  store( &y[j+IT::size*3UL], xmm4 );
474  }
475  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
476  IntrinsicType xmm1, xmm2, xmm3;
477  for( size_t i=0UL; i<M; ++i ) {
478  const IntrinsicType x1( set( x[i] ) );
479  xmm1 = xmm1 + x1 * A.get(i,j );
480  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
481  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
482  }
483  store( &y[j ], xmm1 );
484  store( &y[j+IT::size ], xmm2 );
485  store( &y[j+IT::size*2UL], xmm3 );
486  }
487  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
488  IntrinsicType xmm1, xmm2;
489  for( size_t i=0UL; i<M; ++i ) {
490  const IntrinsicType x1( set( x[i] ) );
491  xmm1 = xmm1 + x1 * A.get(i,j );
492  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
493  }
494  store( &y[j ], xmm1 );
495  store( &y[j+IT::size], xmm2 );
496  }
497  if( j < N ) {
498  IntrinsicType xmm1;
499  for( size_t i=0UL; i<M; ++i ) {
500  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
501  }
502  store( &y[j], xmm1 );
503  }
504  }
506  //**********************************************************************************************
507 
508  //**BLAS-based assignment to dense vectors (default)********************************************
522  template< typename VT1 // Type of the left-hand side target vector
523  , typename VT2 // Type of the left-hand side vector operand
524  , typename MT1 > // Type of the right-hand side matrix operand
525  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
526  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
527  {
528  selectDefaultAssignKernel( y, x, A );
529  }
531  //**********************************************************************************************
532 
533  //**BLAS-based assignment to dense vectors (single precision)***********************************
534 #if BLAZE_BLAS_MODE
535 
548  template< typename VT1 // Type of the left-hand side target vector
549  , typename VT2 // Type of the left-hand side vector operand
550  , typename MT1 > // Type of the right-hand side matrix operand
551  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
552  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
553  {
554  using boost::numeric_cast;
555 
556  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
557  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
558  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
559 
560  const int M ( numeric_cast<int>( A.rows() ) );
561  const int N ( numeric_cast<int>( A.columns() ) );
562  const int lda( numeric_cast<int>( A.spacing() ) );
563 
564  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
565  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
566  }
568 #endif
569  //**********************************************************************************************
570 
571  //**BLAS-based assignment to dense vectors (double precision)***********************************
572 #if BLAZE_BLAS_MODE
573 
586  template< typename VT1 // Type of the left-hand side target vector
587  , typename VT2 // Type of the left-hand side vector operand
588  , typename MT1 > // Type of the right-hand side matrix operand
589  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
590  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
591  {
592  using boost::numeric_cast;
593 
594  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
595  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
596  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
597 
598  const int M ( numeric_cast<int>( A.rows() ) );
599  const int N ( numeric_cast<int>( A.columns() ) );
600  const int lda( numeric_cast<int>( A.spacing() ) );
601 
602  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
603  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
604  }
606 #endif
607  //**********************************************************************************************
608 
609  //**BLAS-based assignment to dense vectors (single precision complex)***************************
610 #if BLAZE_BLAS_MODE
611 
624  template< typename VT1 // Type of the left-hand side target vector
625  , typename VT2 // Type of the left-hand side vector operand
626  , typename MT1 > // Type of the right-hand side matrix operand
627  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
628  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
629  {
630  using boost::numeric_cast;
631 
632  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
633  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
634  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
635  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
636  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
637  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
638 
639  const int M ( numeric_cast<int>( A.rows() ) );
640  const int N ( numeric_cast<int>( A.columns() ) );
641  const int lda( numeric_cast<int>( A.spacing() ) );
642  const complex<float> alpha( 1.0F, 0.0F );
643  const complex<float> beta ( 0.0F, 0.0F );
644 
645  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
646  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
647  }
649 #endif
650  //**********************************************************************************************
651 
652  //**BLAS-based assignment to dense vectors (double precision complex)***************************
653 #if BLAZE_BLAS_MODE
654 
667  template< typename VT1 // Type of the left-hand side target vector
668  , typename VT2 // Type of the left-hand side vector operand
669  , typename MT1 > // Type of the right-hand side matrix operand
670  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
671  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
672  {
673  using boost::numeric_cast;
674 
675  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
676  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
677  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
678  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
679  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
680  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
681 
682  const int M ( numeric_cast<int>( A.rows() ) );
683  const int N ( numeric_cast<int>( A.columns() ) );
684  const int lda( numeric_cast<int>( A.spacing() ) );
685  const complex<double> alpha( 1.0, 0.0 );
686  const complex<double> beta ( 0.0, 0.0 );
687 
688  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
689  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
690  }
692 #endif
693  //**********************************************************************************************
694 
695  //**Assignment to sparse vectors****************************************************************
708  template< typename VT1 > // Type of the target sparse vector
709  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
710  {
713  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
714 
715  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
716 
717  const ResultType tmp( rhs );
718  assign( ~lhs, tmp );
719  }
721  //**********************************************************************************************
722 
723  //**Addition assignment to dense vectors********************************************************
736  template< typename VT1 > // Type of the target dense vector
737  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
738  {
739  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
740 
741  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
742  return;
743  }
744 
745  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
746  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
747 
748  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
749  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
750  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
751  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
752 
753  if( ( IsComputation<MT>::value && !evaluate ) ||
754  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
755  TDVecDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
756  else
757  TDVecDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
758  }
760  //**********************************************************************************************
761 
762  //**Default addition assignment to dense vectors************************************************
776  template< typename VT1 // Type of the left-hand side target vector
777  , typename VT2 // Type of the left-hand side vector operand
778  , typename MT1 > // Type of the right-hand side matrix operand
779  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
780  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
781  {
782  const size_t M( A.rows() );
783  const size_t N( A.columns() );
784 
785  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
786  const size_t jend( N & size_t(-2) );
787 
788  for( size_t i=0UL; i<M; ++i ) {
789  for( size_t j=0UL; j<jend; j+=2UL ) {
790  y[j ] += x[i] * A(i,j );
791  y[j+1UL] += x[i] * A(i,j+1UL);
792  }
793  if( jend < N ) {
794  y[jend] += x[i] * A(i,jend);
795  }
796  }
797  }
799  //**********************************************************************************************
800 
801  //**Vectorized default addition assignment to dense vectors*************************************
815  template< typename VT1 // Type of the left-hand side target vector
816  , typename VT2 // Type of the left-hand side vector operand
817  , typename MT1 > // Type of the right-hand side matrix operand
818  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
819  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
820  {
821  typedef IntrinsicTrait<ElementType> IT;
822 
823  const size_t M( A.rows() );
824  const size_t N( A.spacing() );
825 
826  size_t j( 0UL );
827 
828  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
829  IntrinsicType xmm1( load( &y[j ] ) );
830  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
831  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
832  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
833  IntrinsicType xmm5( load( &y[j+IT::size*4UL] ) );
834  IntrinsicType xmm6( load( &y[j+IT::size*5UL] ) );
835  IntrinsicType xmm7( load( &y[j+IT::size*6UL] ) );
836  IntrinsicType xmm8( load( &y[j+IT::size*7UL] ) );
837  for( size_t i=0UL; i<M; ++i ) {
838  const IntrinsicType x1( set( x[i] ) );
839  xmm1 = xmm1 + x1 * A.get(i,j );
840  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
841  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
842  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
843  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
844  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
845  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
846  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
847  }
848  store( &y[j ], xmm1 );
849  store( &y[j+IT::size ], xmm2 );
850  store( &y[j+IT::size*2UL], xmm3 );
851  store( &y[j+IT::size*3UL], xmm4 );
852  store( &y[j+IT::size*4UL], xmm5 );
853  store( &y[j+IT::size*5UL], xmm6 );
854  store( &y[j+IT::size*6UL], xmm7 );
855  store( &y[j+IT::size*7UL], xmm8 );
856  }
857  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
858  IntrinsicType xmm1( load( &y[j ] ) );
859  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
860  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
861  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
862  for( size_t i=0UL; i<M; ++i ) {
863  const IntrinsicType x1( set( x[i] ) );
864  xmm1 = xmm1 + x1 * A.get(i,j );
865  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
866  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
867  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
868  }
869  store( &y[j ], xmm1 );
870  store( &y[j+IT::size ], xmm2 );
871  store( &y[j+IT::size*2UL], xmm3 );
872  store( &y[j+IT::size*3UL], xmm4 );
873  }
874  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
875  IntrinsicType xmm1( load( &y[j ] ) );
876  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
877  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
878  for( size_t i=0UL; i<M; ++i ) {
879  const IntrinsicType x1( set( x[i] ) );
880  xmm1 = xmm1 + x1 * A.get(i,j );
881  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
882  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
883  }
884  store( &y[j ], xmm1 );
885  store( &y[j+IT::size ], xmm2 );
886  store( &y[j+IT::size*2UL], xmm3 );
887  }
888  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
889  IntrinsicType xmm1( load( &y[j ] ) );
890  IntrinsicType xmm2( load( &y[j+IT::size] ) );
891  for( size_t i=0UL; i<M; ++i ) {
892  const IntrinsicType x1( set( x[i] ) );
893  xmm1 = xmm1 + x1 * A.get(i,j );
894  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
895  }
896  store( &y[j ], xmm1 );
897  store( &y[j+IT::size], xmm2 );
898  }
899  if( j < N ) {
900  IntrinsicType xmm1( load( &y[j] ) );
901  for( size_t i=0UL; i<M; ++i ) {
902  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
903  }
904  store( &y[j], xmm1 );
905  }
906  }
908  //**********************************************************************************************
909 
910  //**BLAS-based addition assignment to dense vectors (default)***********************************
924  template< typename VT1 // Type of the left-hand side target vector
925  , typename VT2 // Type of the left-hand side vector operand
926  , typename MT1 > // Type of the right-hand side matrix operand
927  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
928  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
929  {
930  selectDefaultAddAssignKernel( y, x, A );
931  }
933  //**********************************************************************************************
934 
935  //**BLAS-based addition assignment to dense vectors (single precision)**************************
936 #if BLAZE_BLAS_MODE
937 
950  template< typename VT1 // Type of the left-hand side target vector
951  , typename VT2 // Type of the left-hand side vector operand
952  , typename MT1 > // Type of the right-hand side matrix operand
953  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
954  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
955  {
956  using boost::numeric_cast;
957 
958  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
959  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
960  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
961 
962  const int M ( numeric_cast<int>( A.rows() ) );
963  const int N ( numeric_cast<int>( A.columns() ) );
964  const int lda( numeric_cast<int>( A.spacing() ) );
965 
966  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
967  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
968  }
970 #endif
971  //**********************************************************************************************
972 
973  //**BLAS-based addition assignment to dense vectors (double precision)**************************
974 #if BLAZE_BLAS_MODE
975 
988  template< typename VT1 // Type of the left-hand side target vector
989  , typename VT2 // Type of the left-hand side vector operand
990  , typename MT1 > // Type of the right-hand side matrix operand
991  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
992  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
993  {
994  using boost::numeric_cast;
995 
996  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
997  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
998  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
999 
1000  const int M ( numeric_cast<int>( A.rows() ) );
1001  const int N ( numeric_cast<int>( A.columns() ) );
1002  const int lda( numeric_cast<int>( A.spacing() ) );
1003 
1004  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
1005  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1006  }
1008 #endif
1009  //**********************************************************************************************
1010 
1011  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1012 #if BLAZE_BLAS_MODE
1013 
1026  template< typename VT1 // Type of the left-hand side target vector
1027  , typename VT2 // Type of the left-hand side vector operand
1028  , typename MT1 > // Type of the right-hand side matrix operand
1029  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1030  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1031  {
1032  using boost::numeric_cast;
1033 
1034  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1035  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1036  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1037  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1038  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1039  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1040 
1041  const int M ( numeric_cast<int>( A.rows() ) );
1042  const int N ( numeric_cast<int>( A.columns() ) );
1043  const int lda( numeric_cast<int>( A.spacing() ) );
1044  const complex<float> alpha( 1.0F, 0.0F );
1045  const complex<float> beta ( 1.0F, 0.0F );
1046 
1047  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1048  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1049  }
1051 #endif
1052  //**********************************************************************************************
1053 
1054  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1055 #if BLAZE_BLAS_MODE
1056 
1069  template< typename VT1 // Type of the left-hand side target vector
1070  , typename VT2 // Type of the left-hand side vector operand
1071  , typename MT1 > // Type of the right-hand side matrix operand
1072  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1073  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1074  {
1075  using boost::numeric_cast;
1076 
1077  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1078  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1079  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1080  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1081  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1082  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1083 
1084  const int M ( numeric_cast<int>( A.rows() ) );
1085  const int N ( numeric_cast<int>( A.columns() ) );
1086  const int lda( numeric_cast<int>( A.spacing() ) );
1087  const complex<double> alpha( 1.0, 0.0 );
1088  const complex<double> beta ( 1.0, 0.0 );
1089 
1090  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1091  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1092  }
1094 #endif
1095  //**********************************************************************************************
1096 
1097  //**Addition assignment to sparse vectors*******************************************************
1098  // No special implementation for the addition assignment to sparse vectors.
1099  //**********************************************************************************************
1100 
1101  //**Subtraction assignment to dense vectors*****************************************************
1114  template< typename VT1 > // Type of the target dense vector
1115  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1116  {
1117  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1118 
1119  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1120  return;
1121  }
1122 
1123  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1124  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1125 
1126  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1127  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1128  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1129  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1130 
1131  if( ( IsComputation<MT>::value && !evaluate ) ||
1132  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1133  TDVecDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1134  else
1135  TDVecDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1136  }
1138  //**********************************************************************************************
1139 
1140  //**Default subtraction assignment to dense vectors*********************************************
1154  template< typename VT1 // Type of the left-hand side target vector
1155  , typename VT2 // Type of the left-hand side vector operand
1156  , typename MT1 > // Type of the right-hand side matrix operand
1157  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1158  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1159  {
1160  const size_t M( A.rows() );
1161  const size_t N( A.columns() );
1162 
1163  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1164  const size_t jend( N & size_t(-2) );
1165 
1166  for( size_t i=0UL; i<M; ++i ) {
1167  for( size_t j=0UL; j<jend; j+=2UL ) {
1168  y[j ] -= x[i] * A(i,j );
1169  y[j+1UL] -= x[i] * A(i,j+1UL);
1170  }
1171  if( jend < N ) {
1172  y[jend] -= x[i] * A(i,jend);
1173  }
1174  }
1175  }
1177  //**********************************************************************************************
1178 
1179  //**Vectorized default subtraction assignment to dense vectors**********************************
1193  template< typename VT1 // Type of the left-hand side target vector
1194  , typename VT2 // Type of the left-hand side vector operand
1195  , typename MT1 > // Type of the right-hand side matrix operand
1196  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1197  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1198  {
1199  typedef IntrinsicTrait<ElementType> IT;
1200 
1201  const size_t M( A.rows() );
1202  const size_t N( A.spacing() );
1203 
1204  size_t j( 0UL );
1205 
1206  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
1207  IntrinsicType xmm1( load( &y[j ] ) );
1208  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1209  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1210  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
1211  IntrinsicType xmm5( load( &y[j+IT::size*4UL] ) );
1212  IntrinsicType xmm6( load( &y[j+IT::size*5UL] ) );
1213  IntrinsicType xmm7( load( &y[j+IT::size*6UL] ) );
1214  IntrinsicType xmm8( load( &y[j+IT::size*7UL] ) );
1215  for( size_t i=0UL; i<M; ++i ) {
1216  const IntrinsicType x1( set( x[i] ) );
1217  xmm1 = xmm1 - x1 * A.get(i,j );
1218  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1219  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1220  xmm4 = xmm4 - x1 * A.get(i,j+IT::size*3UL);
1221  xmm5 = xmm5 - x1 * A.get(i,j+IT::size*4UL);
1222  xmm6 = xmm6 - x1 * A.get(i,j+IT::size*5UL);
1223  xmm7 = xmm7 - x1 * A.get(i,j+IT::size*6UL);
1224  xmm8 = xmm8 - x1 * A.get(i,j+IT::size*7UL);
1225  }
1226  store( &y[j ], xmm1 );
1227  store( &y[j+IT::size ], xmm2 );
1228  store( &y[j+IT::size*2UL], xmm3 );
1229  store( &y[j+IT::size*3UL], xmm4 );
1230  store( &y[j+IT::size*4UL], xmm5 );
1231  store( &y[j+IT::size*5UL], xmm6 );
1232  store( &y[j+IT::size*6UL], xmm7 );
1233  store( &y[j+IT::size*7UL], xmm8 );
1234  }
1235  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1236  IntrinsicType xmm1( load( &y[j ] ) );
1237  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1238  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1239  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
1240  for( size_t i=0UL; i<M; ++i ) {
1241  const IntrinsicType x1( set( x[i] ) );
1242  xmm1 = xmm1 - x1 * A.get(i,j );
1243  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1244  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1245  xmm4 = xmm4 - x1 * A.get(i,j+IT::size*3UL);
1246  }
1247  store( &y[j ], xmm1 );
1248  store( &y[j+IT::size ], xmm2 );
1249  store( &y[j+IT::size*2UL], xmm3 );
1250  store( &y[j+IT::size*3UL], xmm4 );
1251  }
1252  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
1253  IntrinsicType xmm1( load( &y[j ] ) );
1254  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1255  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1256  for( size_t i=0UL; i<M; ++i ) {
1257  const IntrinsicType x1( set( x[i] ) );
1258  xmm1 = xmm1 - x1 * A.get(i,j );
1259  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1260  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1261  }
1262  store( &y[j ], xmm1 );
1263  store( &y[j+IT::size ], xmm2 );
1264  store( &y[j+IT::size*2UL], xmm3 );
1265  }
1266  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1267  IntrinsicType xmm1( load( &y[j ] ) );
1268  IntrinsicType xmm2( load( &y[j+IT::size] ) );
1269  for( size_t i=0UL; i<M; ++i ) {
1270  const IntrinsicType x1( set( x[i] ) );
1271  xmm1 = xmm1 - x1 * A.get(i,j );
1272  xmm2 = xmm2 - x1 * A.get(i,j+IT::size);
1273  }
1274  store( &y[j ], xmm1 );
1275  store( &y[j+IT::size], xmm2 );
1276  }
1277  if( j < N ) {
1278  IntrinsicType xmm1( load( &y[j] ) );
1279  for( size_t i=0UL; i<M; ++i ) {
1280  xmm1 = xmm1 - set( x[i] ) * A.get(i,j);
1281  }
1282  store( &y[j], xmm1 );
1283  }
1284  }
1286  //**********************************************************************************************
1287 
1288  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1302  template< typename VT1 // Type of the left-hand side target vector
1303  , typename VT2 // Type of the left-hand side vector operand
1304  , typename MT1 > // Type of the right-hand side matrix operand
1305  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1306  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1307  {
1308  selectDefaultSubAssignKernel( y, x, A );
1309  }
1311  //**********************************************************************************************
1312 
1313  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1314 #if BLAZE_BLAS_MODE
1315 
1328  template< typename VT1 // Type of the left-hand side target vector
1329  , typename VT2 // Type of the left-hand side vector operand
1330  , typename MT1 > // Type of the right-hand side matrix operand
1331  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1332  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1333  {
1334  using boost::numeric_cast;
1335 
1336  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1337  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1338  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1339 
1340  const int M ( numeric_cast<int>( A.rows() ) );
1341  const int N ( numeric_cast<int>( A.columns() ) );
1342  const int lda( numeric_cast<int>( A.spacing() ) );
1343 
1344  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -1.0F,
1345  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1346  }
1348 #endif
1349  //**********************************************************************************************
1350 
1351  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1352 #if BLAZE_BLAS_MODE
1353 
1366  template< typename VT1 // Type of the left-hand side target vector
1367  , typename VT2 // Type of the left-hand side vector operand
1368  , typename MT1 > // Type of the right-hand side matrix operand
1369  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1370  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1371  {
1372  using boost::numeric_cast;
1373 
1374  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1375  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1376  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1377 
1378  const int M ( numeric_cast<int>( A.rows() ) );
1379  const int N ( numeric_cast<int>( A.columns() ) );
1380  const int lda( numeric_cast<int>( A.spacing() ) );
1381 
1382  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -1.0,
1383  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1384  }
1386 #endif
1387  //**********************************************************************************************
1388 
1389  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1390 #if BLAZE_BLAS_MODE
1391 
1404  template< typename VT1 // Type of the left-hand side target vector
1405  , typename VT2 // Type of the left-hand side vector operand
1406  , typename MT1 > // Type of the right-hand side matrix operand
1407  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1408  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1409  {
1410  using boost::numeric_cast;
1411 
1412  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1413  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1414  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1415  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1416  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1417  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1418 
1419  const int M ( numeric_cast<int>( A.rows() ) );
1420  const int N ( numeric_cast<int>( A.columns() ) );
1421  const int lda( numeric_cast<int>( A.spacing() ) );
1422  const complex<float> alpha( -1.0F, 0.0F );
1423  const complex<float> beta ( 1.0F, 0.0F );
1424 
1425  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1426  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1427  }
1429 #endif
1430  //**********************************************************************************************
1431 
1432  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1433 #if BLAZE_BLAS_MODE
1434 
1447  template< typename VT1 // Type of the left-hand side target vector
1448  , typename VT2 // Type of the left-hand side vector operand
1449  , typename MT1 > // Type of the right-hand side matrix operand
1450  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1451  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1452  {
1453  using boost::numeric_cast;
1454 
1455  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1456  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1457  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1458  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1459  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1460  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1461 
1462  const int M ( numeric_cast<int>( A.rows() ) );
1463  const int N ( numeric_cast<int>( A.columns() ) );
1464  const int lda( numeric_cast<int>( A.spacing() ) );
1465  const complex<double> alpha( -1.0, 0.0 );
1466  const complex<double> beta ( 1.0, 0.0 );
1467 
1468  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1469  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1470  }
1472 #endif
1473  //**********************************************************************************************
1474 
1475  //**Subtraction assignment to sparse vectors****************************************************
1476  // No special implementation for the subtraction assignment to sparse vectors.
1477  //**********************************************************************************************
1478 
1479  //**Multiplication assignment to dense vectors**************************************************
1492  template< typename VT1 > // Type of the target dense vector
1493  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1494  {
1497  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1498 
1499  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1500 
1501  const ResultType tmp( rhs );
1502  multAssign( ~lhs, tmp );
1503  }
1505  //**********************************************************************************************
1506 
1507  //**Multiplication assignment to sparse vectors*******************************************************
1508  // No special implementation for the multiplication assignment to sparse vectors.
1509  //**********************************************************************************************
1510 
1511  //**Compile time checks*************************************************************************
1518  //**********************************************************************************************
1519 };
1520 //*************************************************************************************************
1521 
1522 
1523 
1524 
1525 //=================================================================================================
1526 //
1527 // DVECSCALARMULTEXPR SPECIALIZATION
1528 //
1529 //=================================================================================================
1530 
1531 //*************************************************************************************************
1539 template< typename VT // Type of the left-hand side dense vector
1540  , typename MT // Type of the right-hand side dense matrix
1541  , typename ST > // Type of the side scalar value
1542 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
1543  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1544  , private Expression
1545  , private Computation
1546 {
1547  private:
1548  //**Type definitions****************************************************************************
1549  typedef TDVecDMatMultExpr<VT,MT> VMM;
1550  typedef typename VMM::ResultType RES;
1551  typedef typename VT::ResultType VRT;
1552  typedef typename MT::ResultType MRT;
1553  typedef typename VRT::ElementType VET;
1554  typedef typename MRT::ElementType MET;
1555  typedef typename VT::CompositeType VCT;
1556  typedef typename MT::CompositeType MCT;
1557  //**********************************************************************************************
1558 
1559  //**********************************************************************************************
1561  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1562  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1563  //**********************************************************************************************
1564 
1565  //**********************************************************************************************
1567 
1570  template< typename T1, typename T2, typename T3, typename T4 >
1571  struct UseSinglePrecisionKernel {
1572  enum { value = IsFloat<typename T1::ElementType>::value &&
1573  IsFloat<typename T2::ElementType>::value &&
1574  IsFloat<typename T3::ElementType>::value &&
1575  !IsComplex<T4>::value };
1576  };
1577  //**********************************************************************************************
1578 
1579  //**********************************************************************************************
1581 
1584  template< typename T1, typename T2, typename T3, typename T4 >
1585  struct UseDoublePrecisionKernel {
1586  enum { value = IsDouble<typename T1::ElementType>::value &&
1587  IsDouble<typename T2::ElementType>::value &&
1588  IsDouble<typename T3::ElementType>::value &&
1589  !IsComplex<T4>::value };
1590  };
1591  //**********************************************************************************************
1592 
1593  //**********************************************************************************************
1595 
1598  template< typename T1, typename T2, typename T3 >
1599  struct UseSinglePrecisionComplexKernel {
1600  typedef complex<float> Type;
1601  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1602  IsSame<typename T2::ElementType,Type>::value &&
1603  IsSame<typename T3::ElementType,Type>::value };
1604  };
1605  //**********************************************************************************************
1606 
1607  //**********************************************************************************************
1609 
1612  template< typename T1, typename T2, typename T3 >
1613  struct UseDoublePrecisionComplexKernel {
1614  typedef complex<double> Type;
1615  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1616  IsSame<typename T2::ElementType,Type>::value &&
1617  IsSame<typename T3::ElementType,Type>::value };
1618  };
1619  //**********************************************************************************************
1620 
1621  //**********************************************************************************************
1623 
1625  template< typename T1, typename T2, typename T3, typename T4 >
1626  struct UseDefaultKernel {
1627  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1628  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1629  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1630  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1631  };
1632  //**********************************************************************************************
1633 
1634  //**********************************************************************************************
1636 
1639  template< typename T1, typename T2, typename T3, typename T4 >
1640  struct UseVectorizedDefaultKernel {
1641  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1642  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1643  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1644  IsSame<typename T1::ElementType,T4>::value &&
1645  IntrinsicTrait<typename T1::ElementType>::addition &&
1646  IntrinsicTrait<typename T1::ElementType>::multiplication };
1647  };
1648  //**********************************************************************************************
1649 
1650  public:
1651  //**Type definitions****************************************************************************
1652  typedef DVecScalarMultExpr<VMM,ST,true> This;
1653  typedef typename MultTrait<RES,ST>::Type ResultType;
1654  typedef typename ResultType::TransposeType TransposeType;
1655  typedef typename ResultType::ElementType ElementType;
1656  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1657  typedef const ElementType ReturnType;
1658  typedef const ResultType CompositeType;
1659 
1661  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
1662 
1664  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
1665 
1667  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
1668 
1670  typedef typename SelectType< evaluate, const MRT, MCT >::Type RT;
1671  //**********************************************************************************************
1672 
1673  //**Compilation flags***************************************************************************
1675  enum { vectorizable = 0 };
1676 
1678  enum { canAlias = CanAlias<VMM>::value };
1679  //**********************************************************************************************
1680 
1681  //**Constructor*********************************************************************************
1687  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1688  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1689  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1690  {}
1691  //**********************************************************************************************
1692 
1693  //**Subscript operator**************************************************************************
1699  inline ReturnType operator[]( size_t index ) const {
1700  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1701  return vector_[index] * scalar_;
1702  }
1703  //**********************************************************************************************
1704 
1705  //**Size function*******************************************************************************
1710  inline size_t size() const {
1711  return vector_.size();
1712  }
1713  //**********************************************************************************************
1714 
1715  //**Left operand access*************************************************************************
1720  inline LeftOperand leftOperand() const {
1721  return vector_;
1722  }
1723  //**********************************************************************************************
1724 
1725  //**Right operand access************************************************************************
1730  inline RightOperand rightOperand() const {
1731  return scalar_;
1732  }
1733  //**********************************************************************************************
1734 
1735  //**********************************************************************************************
1741  template< typename T >
1742  inline bool isAliased( const T* alias ) const {
1743  return CanAlias<VMM>::value && vector_.isAliased( alias );
1744  }
1745  //**********************************************************************************************
1746 
1747  private:
1748  //**Member variables****************************************************************************
1749  LeftOperand vector_;
1750  RightOperand scalar_;
1751  //**********************************************************************************************
1752 
1753  //**Assignment to dense vectors*****************************************************************
1765  template< typename VT1 > // Type of the target dense vector
1766  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
1767  {
1768  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1769 
1770  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1771  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1772 
1773  if( right.rows() == 0UL ) {
1774  reset( ~lhs );
1775  return;
1776  }
1777  else if( right.columns() == 0UL ) {
1778  return;
1779  }
1780 
1781  LT x( left ); // Evaluation of the left-hand side dense vector operand
1782  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1783 
1784  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1785  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1786  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1787  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1788 
1789  if( ( IsComputation<MT>::value && !evaluate ) ||
1790  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1791  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1792  else
1793  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1794  }
1795  //**********************************************************************************************
1796 
1797  //**Default assignment to dense vectors*********************************************************
1811  template< typename VT1 // Type of the left-hand side target vector
1812  , typename VT2 // Type of the left-hand side vector operand
1813  , typename MT1 // Type of the right-hand side matrix operand
1814  , typename ST2 > // Type of the scalar value
1815  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1816  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1817  {
1818  const size_t M( A.rows() );
1819  const size_t N( A.columns() );
1820 
1821  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1822  const size_t jend( N & size_t(-2) );
1823 
1824  for( size_t j=0UL; j<N; ++j ) {
1825  y[j] = x[0UL] * A(0UL,j);
1826  }
1827  for( size_t i=1UL; i<M; ++i ) {
1828  for( size_t j=0UL; j<jend; j+=2UL ) {
1829  y[j ] += x[i] * A(i,j );
1830  y[j+1UL] += x[i] * A(i,j+1UL);
1831  }
1832  if( jend < N ) {
1833  y[jend] += x[i] * A(i,jend);
1834  }
1835  }
1836  for( size_t j=0UL; j<N; ++j ) {
1837  y[j] *= scalar;
1838  }
1839  }
1840  //**********************************************************************************************
1841 
1842  //**Default assignment to dense vectors*********************************************************
1856  template< typename VT1 // Type of the left-hand side target vector
1857  , typename VT2 // Type of the left-hand side vector operand
1858  , typename MT1 // Type of the right-hand side matrix operand
1859  , typename ST2 > // Type of the scalar value
1860  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1861  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1862  {
1863  typedef IntrinsicTrait<ElementType> IT;
1864 
1865  const size_t M( A.rows() );
1866  const size_t N( A.spacing() );
1867 
1868  const IntrinsicType factor( set( scalar ) );
1869 
1870  size_t j( 0UL );
1871 
1872  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
1873  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1874  for( size_t i=0UL; i<M; ++i ) {
1875  const IntrinsicType x1( set( x[i] ) );
1876  xmm1 = xmm1 + x1 * A.get(i,j );
1877  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1878  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1879  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
1880  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
1881  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
1882  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
1883  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
1884  }
1885  store( &y[j ], xmm1*factor );
1886  store( &y[j+IT::size ], xmm2*factor );
1887  store( &y[j+IT::size*2UL], xmm3*factor );
1888  store( &y[j+IT::size*3UL], xmm4*factor );
1889  store( &y[j+IT::size*4UL], xmm5*factor );
1890  store( &y[j+IT::size*5UL], xmm6*factor );
1891  store( &y[j+IT::size*6UL], xmm7*factor );
1892  store( &y[j+IT::size*7UL], xmm8*factor );
1893  }
1894  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1895  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1896  for( size_t i=0UL; i<M; ++i ) {
1897  const IntrinsicType x1( set( x[i] ) );
1898  xmm1 = xmm1 + x1 * A.get(i,j );
1899  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1900  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1901  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
1902  }
1903  store( &y[j ], xmm1*factor );
1904  store( &y[j+IT::size ], xmm2*factor );
1905  store( &y[j+IT::size*2UL], xmm3*factor );
1906  store( &y[j+IT::size*3UL], xmm4*factor );
1907  }
1908  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
1909  IntrinsicType xmm1, xmm2, xmm3;
1910  for( size_t i=0UL; i<M; ++i ) {
1911  const IntrinsicType x1( set( x[i] ) );
1912  xmm1 = xmm1 + x1 * A.get(i,j );
1913  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1914  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1915  }
1916  store( &y[j ], xmm1*factor );
1917  store( &y[j+IT::size ], xmm2*factor );
1918  store( &y[j+IT::size*2UL], xmm3*factor );
1919  }
1920  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1921  IntrinsicType xmm1, xmm2;
1922  for( size_t i=0UL; i<M; ++i ) {
1923  const IntrinsicType x1( set( x[i] ) );
1924  xmm1 = xmm1 + x1 * A.get(i,j );
1925  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
1926  }
1927  store( &y[j ], xmm1*factor );
1928  store( &y[j+IT::size], xmm2*factor );
1929  }
1930  if( j < N ) {
1931  IntrinsicType xmm1;
1932  for( size_t i=0UL; i<M; ++i ) {
1933  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
1934  }
1935  store( &y[j], xmm1*factor );
1936  }
1937  }
1938  //**********************************************************************************************
1939 
1940  //**BLAS-based assignment to dense vectors (default)********************************************
1953  template< typename VT1 // Type of the left-hand side target vector
1954  , typename VT2 // Type of the left-hand side vector operand
1955  , typename MT1 // Type of the right-hand side matrix operand
1956  , typename ST2 > // Type of the scalar value
1957  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1958  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1959  {
1960  selectDefaultAssignKernel( y, x, A, scalar );
1961  }
1962  //**********************************************************************************************
1963 
1964  //**BLAS-based assignment to dense vectors (single precision)***********************************
1965 #if BLAZE_BLAS_MODE
1966 
1979  template< typename VT1 // Type of the left-hand side target vector
1980  , typename VT2 // Type of the left-hand side vector operand
1981  , typename MT1 // Type of the right-hand side matrix operand
1982  , typename ST2 > // Type of the scalar value
1983  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1984  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1985  {
1986  using boost::numeric_cast;
1987 
1988  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1989  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1990  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1991 
1992  const int M ( numeric_cast<int>( A.rows() ) );
1993  const int N ( numeric_cast<int>( A.columns() ) );
1994  const int lda( numeric_cast<int>( A.spacing() ) );
1995 
1996  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
1997  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1998  }
1999 #endif
2000  //**********************************************************************************************
2001 
2002  //**BLAS-based assignment to dense vectors (double precision)***********************************
2003 #if BLAZE_BLAS_MODE
2004 
2017  template< typename VT1 // Type of the left-hand side target vector
2018  , typename VT2 // Type of the left-hand side vector operand
2019  , typename MT1 // Type of the right-hand side matrix operand
2020  , typename ST2 > // Type of the scalar value
2021  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2022  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2023  {
2024  using boost::numeric_cast;
2025 
2026  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2027  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2028  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2029 
2030  const int M ( numeric_cast<int>( A.rows() ) );
2031  const int N ( numeric_cast<int>( A.columns() ) );
2032  const int lda( numeric_cast<int>( A.spacing() ) );
2033 
2034  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2035  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2036  }
2037 #endif
2038  //**********************************************************************************************
2039 
2040  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2041 #if BLAZE_BLAS_MODE
2042 
2055  template< typename VT1 // Type of the left-hand side target vector
2056  , typename VT2 // Type of the left-hand side vector operand
2057  , typename MT1 // Type of the right-hand side matrix operand
2058  , typename ST2 > // Type of the scalar value
2059  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2060  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2061  {
2062  using boost::numeric_cast;
2063 
2064  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2065  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2066  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2068  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2069  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2070  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2071 
2072  const int M ( numeric_cast<int>( A.rows() ) );
2073  const int N ( numeric_cast<int>( A.columns() ) );
2074  const int lda( numeric_cast<int>( A.spacing() ) );
2075  const complex<float> alpha( scalar );
2076  const complex<float> beta ( 0.0F, 0.0F );
2077 
2078  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2079  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2080  }
2081 #endif
2082  //**********************************************************************************************
2083 
2084  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2085 #if BLAZE_BLAS_MODE
2086 
2099  template< typename VT1 // Type of the left-hand side target vector
2100  , typename VT2 // Type of the left-hand side vector operand
2101  , typename MT1 // Type of the right-hand side matrix operand
2102  , typename ST2 > // Type of the scalar value
2103  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2104  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2105  {
2106  using boost::numeric_cast;
2107 
2108  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2109  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2110  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2112  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2113  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2114  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2115 
2116  const int M ( numeric_cast<int>( A.rows() ) );
2117  const int N ( numeric_cast<int>( A.columns() ) );
2118  const int lda( numeric_cast<int>( A.spacing() ) );
2119  const complex<double> alpha( scalar );
2120  const complex<double> beta ( 0.0, 0.0 );
2121 
2122  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2123  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2124  }
2125 #endif
2126  //**********************************************************************************************
2127 
2128  //**Assignment to sparse vectors****************************************************************
2140  template< typename VT1 > // Type of the target sparse vector
2141  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2142  {
2145  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2146 
2147  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2148 
2149  const ResultType tmp( rhs );
2150  assign( ~lhs, tmp );
2151  }
2152  //**********************************************************************************************
2153 
2154  //**Addition assignment to dense vectors********************************************************
2166  template< typename VT1 > // Type of the target dense vector
2167  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2168  {
2169  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2170 
2171  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2172  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2173 
2174  if( right.rows() == 0UL || right.columns() == 0UL ) {
2175  return;
2176  }
2177 
2178  LT x( left ); // Evaluation of the left-hand side dense vector operand
2179  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2180 
2181  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2182  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2183  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2184  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2185 
2186  if( ( IsComputation<MT>::value && !evaluate ) ||
2187  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2188  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2189  else
2190  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2191  }
2192  //**********************************************************************************************
2193 
2194  //**Default addition assignment to dense vectors************************************************
2208  template< typename VT1 // Type of the left-hand side target vector
2209  , typename VT2 // Type of the left-hand side vector operand
2210  , typename MT1 // Type of the right-hand side matrix operand
2211  , typename ST2 > // Type of the scalar value
2212  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2213  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2214  {
2215  y.addAssign( x * A * scalar );
2216  }
2217  //**********************************************************************************************
2218 
2219  //**Vectorized default addition assignment to dense vectors*************************************
2233  template< typename VT1 // Type of the left-hand side target vector
2234  , typename VT2 // Type of the left-hand side vector operand
2235  , typename MT1 // Type of the right-hand side matrix operand
2236  , typename ST2 > // Type of the scalar value
2237  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2238  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2239  {
2240  typedef IntrinsicTrait<ElementType> IT;
2241 
2242  const size_t M( A.rows() );
2243  const size_t N( A.spacing() );
2244 
2245  const IntrinsicType factor( set( scalar ) );
2246 
2247  size_t j( 0UL );
2248 
2249  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2250  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2251  for( size_t i=0UL; i<M; ++i ) {
2252  const IntrinsicType x1( set( x[i] ) );
2253  xmm1 = xmm1 + x1 * A.get(i,j );
2254  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2255  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2256  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2257  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
2258  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
2259  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
2260  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
2261  }
2262  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2263  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2264  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2265  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) + xmm4*factor );
2266  store( &y[j+IT::size*4UL], load( &y[j+IT::size*4UL] ) + xmm5*factor );
2267  store( &y[j+IT::size*5UL], load( &y[j+IT::size*5UL] ) + xmm6*factor );
2268  store( &y[j+IT::size*6UL], load( &y[j+IT::size*6UL] ) + xmm7*factor );
2269  store( &y[j+IT::size*7UL], load( &y[j+IT::size*7UL] ) + xmm8*factor );
2270  }
2271  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2272  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2273  for( size_t i=0UL; i<M; ++i ) {
2274  const IntrinsicType x1( set( x[i] ) );
2275  xmm1 = xmm1 + x1 * A.get(i,j );
2276  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2277  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2278  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2279  }
2280  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2281  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2282  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2283  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) + xmm4*factor );
2284  }
2285  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
2286  IntrinsicType xmm1, xmm2, xmm3;
2287  for( size_t i=0UL; i<M; ++i ) {
2288  const IntrinsicType x1( set( x[i] ) );
2289  xmm1 = xmm1 + x1 * A.get(i,j );
2290  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2291  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2292  }
2293  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2294  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2295  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2296  }
2297  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2298  IntrinsicType xmm1, xmm2;
2299  for( size_t i=0UL; i<M; ++i ) {
2300  const IntrinsicType x1( set( x[i] ) );
2301  xmm1 = xmm1 + x1 * A.get(i,j );
2302  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
2303  }
2304  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2305  store( &y[j+IT::size], load( &y[j+IT::size] ) + xmm2*factor );
2306  }
2307  if( j < N ) {
2308  IntrinsicType xmm1;
2309  for( size_t i=0UL; i<M; ++i ) {
2310  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
2311  }
2312  store( &y[j], load( &y[j] ) + xmm1*factor );
2313  }
2314  }
2315  //**********************************************************************************************
2316 
2317  //**BLAS-based addition assignment to dense vectors (default)***********************************
2331  template< typename VT1 // Type of the left-hand side target vector
2332  , typename VT2 // Type of the left-hand side vector operand
2333  , typename MT1 // Type of the right-hand side matrix operand
2334  , typename ST2 > // Type of the scalar value
2335  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2336  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2337  {
2338  selectDefaultAddAssignKernel( y, x, A, scalar );
2339  }
2340  //**********************************************************************************************
2341 
2342  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2343 #if BLAZE_BLAS_MODE
2344 
2357  template< typename VT1 // Type of the left-hand side target vector
2358  , typename VT2 // Type of the left-hand side vector operand
2359  , typename MT1 // Type of the right-hand side matrix operand
2360  , typename ST2 > // Type of the scalar value
2361  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2362  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2363  {
2364  using boost::numeric_cast;
2365 
2366  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2367  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2368  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2369 
2370  const int M ( numeric_cast<int>( A.rows() ) );
2371  const int N ( numeric_cast<int>( A.columns() ) );
2372  const int lda( numeric_cast<int>( A.spacing() ) );
2373 
2374  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2375  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2376  }
2377 #endif
2378  //**********************************************************************************************
2379 
2380  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2381 #if BLAZE_BLAS_MODE
2382 
2395  template< typename VT1 // Type of the left-hand side target vector
2396  , typename VT2 // Type of the left-hand side vector operand
2397  , typename MT1 // Type of the right-hand side matrix operand
2398  , typename ST2 > // Type of the scalar value
2399  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2400  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2401  {
2402  using boost::numeric_cast;
2403 
2404  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2405  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2406  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2407 
2408  const int M ( numeric_cast<int>( A.rows() ) );
2409  const int N ( numeric_cast<int>( A.columns() ) );
2410  const int lda( numeric_cast<int>( A.spacing() ) );
2411 
2412  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2413  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2414  }
2415 #endif
2416  //**********************************************************************************************
2417 
2418  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2419 #if BLAZE_BLAS_MODE
2420 
2433  template< typename VT1 // Type of the left-hand side target vector
2434  , typename VT2 // Type of the left-hand side vector operand
2435  , typename MT1 // Type of the right-hand side matrix operand
2436  , typename ST2 > // Type of the scalar value
2437  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2438  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2439  {
2440  using boost::numeric_cast;
2441 
2442  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2443  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2444  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2446  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2447  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2448  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2449 
2450  const int M ( numeric_cast<int>( A.rows() ) );
2451  const int N ( numeric_cast<int>( A.columns() ) );
2452  const int lda( numeric_cast<int>( A.spacing() ) );
2453  const complex<float> alpha( scalar );
2454  const complex<float> beta ( 1.0F, 0.0F );
2455 
2456  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2457  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2458  }
2459 #endif
2460  //**********************************************************************************************
2461 
2462  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2463 #if BLAZE_BLAS_MODE
2464 
2477  template< typename VT1 // Type of the left-hand side target vector
2478  , typename VT2 // Type of the left-hand side vector operand
2479  , typename MT1 // Type of the right-hand side matrix operand
2480  , typename ST2 > // Type of the scalar value
2481  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2482  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2483  {
2484  using boost::numeric_cast;
2485 
2486  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2487  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2488  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2490  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2491  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2492  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2493 
2494  const int M ( numeric_cast<int>( A.rows() ) );
2495  const int N ( numeric_cast<int>( A.columns() ) );
2496  const int lda( numeric_cast<int>( A.spacing() ) );
2497  const complex<double> alpha( scalar );
2498  const complex<double> beta ( 1.0, 0.0 );
2499 
2500  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2501  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2502  }
2503 #endif
2504  //**********************************************************************************************
2505 
2506  //**Addition assignment to sparse vectors*******************************************************
2507  // No special implementation for the addition assignment to sparse vectors.
2508  //**********************************************************************************************
2509 
2510  //**Subtraction assignment to dense vectors*****************************************************
2522  template< typename VT1 > // Type of the target dense vector
2523  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2524  {
2525  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2526 
2527  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2528  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2529 
2530  if( right.rows() == 0UL || right.columns() == 0UL ) {
2531  return;
2532  }
2533 
2534  LT x( left ); // Evaluation of the left-hand side dense vector operand
2535  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2536 
2537  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2538  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2539  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2540  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2541 
2542  if( ( IsComputation<MT>::value && !evaluate ) ||
2543  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2544  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2545  else
2546  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2547  }
2548  //**********************************************************************************************
2549 
2550  //**Default subtraction assignment to dense vectors*********************************************
2564  template< typename VT1 // Type of the left-hand side target vector
2565  , typename VT2 // Type of the left-hand side vector operand
2566  , typename MT1 // Type of the right-hand side matrix operand
2567  , typename ST2 > // Type of the scalar value
2568  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2569  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2570  {
2571  y.subAssign( x * A * scalar );
2572  }
2573  //**********************************************************************************************
2574 
2575  //**Vectorized default subtraction assignment to dense vectors**********************************
2589  template< typename VT1 // Type of the left-hand side target vector
2590  , typename VT2 // Type of the left-hand side vector operand
2591  , typename MT1 // Type of the right-hand side matrix operand
2592  , typename ST2 > // Type of the scalar value
2593  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2594  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2595  {
2596  typedef IntrinsicTrait<ElementType> IT;
2597 
2598  const size_t M( A.rows() );
2599  const size_t N( A.spacing() );
2600 
2601  const IntrinsicType factor( set( scalar ) );
2602 
2603  size_t j( 0UL );
2604 
2605  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2606  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2607  for( size_t i=0UL; i<M; ++i ) {
2608  const IntrinsicType x1( set( x[i] ) );
2609  xmm1 = xmm1 + x1 * A.get(i,j );
2610  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2611  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2612  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2613  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
2614  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
2615  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
2616  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
2617  }
2618  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2619  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2620  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2621  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) - xmm4*factor );
2622  store( &y[j+IT::size*4UL], load( &y[j+IT::size*4UL] ) - xmm5*factor );
2623  store( &y[j+IT::size*5UL], load( &y[j+IT::size*5UL] ) - xmm6*factor );
2624  store( &y[j+IT::size*6UL], load( &y[j+IT::size*6UL] ) - xmm7*factor );
2625  store( &y[j+IT::size*7UL], load( &y[j+IT::size*7UL] ) - xmm8*factor );
2626  }
2627  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2628  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2629  for( size_t i=0UL; i<M; ++i ) {
2630  const IntrinsicType x1( set( x[i] ) );
2631  xmm1 = xmm1 + x1 * A.get(i,j );
2632  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2633  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2634  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2635  }
2636  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2637  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2638  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2639  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) - xmm4*factor );
2640  }
2641  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
2642  IntrinsicType xmm1, xmm2, xmm3;
2643  for( size_t i=0UL; i<M; ++i ) {
2644  const IntrinsicType x1( set( x[i] ) );
2645  xmm1 = xmm1 + x1 * A.get(i,j );
2646  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2647  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2648  }
2649  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2650  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2651  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2652  }
2653  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2654  IntrinsicType xmm1, xmm2;
2655  for( size_t i=0UL; i<M; ++i ) {
2656  const IntrinsicType x1( set( x[i] ) );
2657  xmm1 = xmm1 + x1 * A.get(i,j );
2658  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
2659  }
2660  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2661  store( &y[j+IT::size], load( &y[j+IT::size] ) - xmm2*factor );
2662  }
2663  if( j < N ) {
2664  IntrinsicType xmm1;
2665  for( size_t i=0UL; i<M; ++i ) {
2666  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
2667  }
2668  store( &y[j], load( &y[j] ) - xmm1*factor );
2669  }
2670  }
2671  //**********************************************************************************************
2672 
2673  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2687  template< typename VT1 // Type of the left-hand side target vector
2688  , typename VT2 // Type of the left-hand side vector operand
2689  , typename MT1 // Type of the right-hand side matrix operand
2690  , typename ST2 > // Type of the scalar value
2691  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2692  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2693  {
2694  selectDefaultSubAssignKernel( y, x, A, scalar );
2695  }
2696  //**********************************************************************************************
2697 
2698  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2699 #if BLAZE_BLAS_MODE
2700 
2713  template< typename VT1 // Type of the left-hand side target vector
2714  , typename VT2 // Type of the left-hand side vector operand
2715  , typename MT1 // Type of the right-hand side matrix operand
2716  , typename ST2 > // Type of the scalar value
2717  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2718  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2719  {
2720  using boost::numeric_cast;
2721 
2722  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2723  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2724  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2725 
2726  const int M ( numeric_cast<int>( A.rows() ) );
2727  const int N ( numeric_cast<int>( A.columns() ) );
2728  const int lda( numeric_cast<int>( A.spacing() ) );
2729 
2730  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2731  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2732  }
2733 #endif
2734  //**********************************************************************************************
2735 
2736  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2737 #if BLAZE_BLAS_MODE
2738 
2751  template< typename VT1 // Type of the left-hand side target vector
2752  , typename VT2 // Type of the left-hand side vector operand
2753  , typename MT1 // Type of the right-hand side matrix operand
2754  , typename ST2 > // Type of the scalar value
2755  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2756  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2757  {
2758  using boost::numeric_cast;
2759 
2760  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2761  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2762  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2763 
2764  const int M ( numeric_cast<int>( A.rows() ) );
2765  const int N ( numeric_cast<int>( A.columns() ) );
2766  const int lda( numeric_cast<int>( A.spacing() ) );
2767 
2768  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2769  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2770  }
2771 #endif
2772  //**********************************************************************************************
2773 
2774  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2775 #if BLAZE_BLAS_MODE
2776 
2789  template< typename VT1 // Type of the left-hand side target vector
2790  , typename VT2 // Type of the left-hand side vector operand
2791  , typename MT1 // Type of the right-hand side matrix operand
2792  , typename ST2 > // Type of the scalar value
2793  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2794  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2795  {
2796  using boost::numeric_cast;
2797 
2798  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2799  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2800  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2802  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2803  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2804  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2805 
2806  const int M ( numeric_cast<int>( A.rows() ) );
2807  const int N ( numeric_cast<int>( A.columns() ) );
2808  const int lda( numeric_cast<int>( A.spacing() ) );
2809  const complex<float> alpha( -scalar );
2810  const complex<float> beta ( 1.0F, 0.0F );
2811 
2812  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2813  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2814  }
2815 #endif
2816  //**********************************************************************************************
2817 
2818  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2819 #if BLAZE_BLAS_MODE
2820 
2833  template< typename VT1 // Type of the left-hand side target vector
2834  , typename VT2 // Type of the left-hand side vector operand
2835  , typename MT1 // Type of the right-hand side matrix operand
2836  , typename ST2 > // Type of the scalar value
2837  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2838  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2839  {
2840  using boost::numeric_cast;
2841 
2842  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2843  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2844  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2846  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2847  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2848  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2849 
2850  const int M ( numeric_cast<int>( A.rows() ) );
2851  const int N ( numeric_cast<int>( A.columns() ) );
2852  const int lda( numeric_cast<int>( A.spacing() ) );
2853  const complex<double> alpha( -scalar );
2854  const complex<double> beta ( 1.0, 0.0 );
2855 
2856  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2857  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2858  }
2859 #endif
2860  //**********************************************************************************************
2861 
2862  //**Subtraction assignment to sparse vectors****************************************************
2863  // No special implementation for the subtraction assignment to sparse vectors.
2864  //**********************************************************************************************
2865 
2866  //**Multiplication assignment to dense vectors**************************************************
2878  template< typename VT1 > // Type of the target dense vector
2879  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2880  {
2883  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2884 
2885  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2886 
2887  const ResultType tmp( rhs );
2888  multAssign( ~lhs, tmp );
2889  }
2890  //**********************************************************************************************
2891 
2892  //**Multiplication assignment to sparse vectors*******************************************************
2893  // No special implementation for the multiplication assignment to sparse vectors.
2894  //**********************************************************************************************
2895 
2896  //**Compile time checks*************************************************************************
2904  //**********************************************************************************************
2905 };
2907 //*************************************************************************************************
2908 
2909 
2910 
2911 
2912 //=================================================================================================
2913 //
2914 // GLOBAL BINARY ARITHMETIC OPERATORS
2915 //
2916 //=================================================================================================
2917 
2918 //*************************************************************************************************
2949 template< typename T1 // Type of the left-hand side dense vector
2950  , typename T2 > // Type of the right-hand side dense matrix
2951 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
2953 {
2954  if( (~vec).size() != (~mat).rows() )
2955  throw std::invalid_argument( "Vector and matrix sizes do not match" );
2956 
2957  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
2958 }
2959 //*************************************************************************************************
2960 
2961 
2962 
2963 
2964 //=================================================================================================
2965 //
2966 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
2967 //
2968 //=================================================================================================
2969 
2970 //*************************************************************************************************
2983 template< typename T1 // Type of the left-hand side dense vector
2984  , typename T2 // Type of the right-hand side dense matrix
2985  , bool SO > // Storage order of the right-hand side dense matrix
2986 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
2988 {
2989  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
2990 }
2991 //*************************************************************************************************
2992 
2993 } // namespace blaze
2994 
2995 #endif