All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
49 #include <blaze/system/BLAS.h>
51 #include <blaze/util/Assert.h>
52 #include <blaze/util/Complex.h>
57 #include <blaze/util/DisableIf.h>
58 #include <blaze/util/EnableIf.h>
60 #include <blaze/util/SelectType.h>
61 #include <blaze/util/Types.h>
67 
68 
69 namespace blaze {
70 
71 //=================================================================================================
72 //
73 // CLASS TDVECDMATMULTEXPR
74 //
75 //=================================================================================================
76 
77 //*************************************************************************************************
84 template< typename VT // Type of the left-hand side dense vector
85  , typename MT > // Type of the right-hand side dense matrix
86 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
87  , private Expression
88  , private Computation
89 {
90  private:
91  //**Type definitions****************************************************************************
92  typedef typename VT::ResultType VRT;
93  typedef typename MT::ResultType MRT;
94  typedef typename VRT::ElementType VET;
95  typedef typename MRT::ElementType MET;
96  typedef typename VT::CompositeType VCT;
97  typedef typename MT::CompositeType MCT;
98  //**********************************************************************************************
99 
100  //**********************************************************************************************
102  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
104  //**********************************************************************************************
105 
106  //**********************************************************************************************
108 
109 
112  template< typename T1, typename T2, typename T3 >
113  struct UseSinglePrecisionKernel {
114  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
118  };
120  //**********************************************************************************************
121 
122  //**********************************************************************************************
124 
125 
128  template< typename T1, typename T2, typename T3 >
129  struct UseDoublePrecisionKernel {
130  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
131  IsDouble<typename T1::ElementType>::value &&
132  IsDouble<typename T2::ElementType>::value &&
133  IsDouble<typename T3::ElementType>::value };
134  };
136  //**********************************************************************************************
137 
138  //**********************************************************************************************
140 
141 
144  template< typename T1, typename T2, typename T3 >
145  struct UseSinglePrecisionComplexKernel {
146  typedef complex<float> Type;
147  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
148  IsSame<typename T1::ElementType,Type>::value &&
149  IsSame<typename T2::ElementType,Type>::value &&
150  IsSame<typename T3::ElementType,Type>::value };
151  };
153  //**********************************************************************************************
154 
155  //**********************************************************************************************
157 
158 
161  template< typename T1, typename T2, typename T3 >
162  struct UseDoublePrecisionComplexKernel {
163  typedef complex<double> Type;
164  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
165  IsSame<typename T1::ElementType,Type>::value &&
166  IsSame<typename T2::ElementType,Type>::value &&
167  IsSame<typename T3::ElementType,Type>::value };
168  };
170  //**********************************************************************************************
171 
172  //**********************************************************************************************
174 
175 
177  template< typename T1, typename T2, typename T3 >
178  struct UseDefaultKernel {
179  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
180  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
181  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
182  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
183  };
185  //**********************************************************************************************
186 
187  //**********************************************************************************************
189 
190 
193  template< typename T1, typename T2, typename T3 >
194  struct UseVectorizedDefaultKernel {
195  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
196  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
197  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
198  IntrinsicTrait<typename T1::ElementType>::addition &&
199  IntrinsicTrait<typename T1::ElementType>::multiplication };
200  };
202  //**********************************************************************************************
203 
204  public:
205  //**Type definitions****************************************************************************
208  typedef typename ResultType::TransposeType TransposeType;
209  typedef typename ResultType::ElementType ElementType;
211  typedef const ElementType ReturnType;
212  typedef const ResultType CompositeType;
213 
215  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
216 
218  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
219 
221  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
222 
225  //**********************************************************************************************
226 
227  //**Compilation flags***************************************************************************
229  enum { vectorizable = 0 };
230  //**********************************************************************************************
231 
232  //**Constructor*********************************************************************************
238  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
239  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
240  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
241  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
242  {
243  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
244  }
245  //**********************************************************************************************
246 
247  //**Subscript operator**************************************************************************
253  inline ReturnType operator[]( size_t index ) const {
254  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
255 
256  ElementType res;
257 
258  if( mat_.rows() != 0UL ) {
259  res = vec_[0UL] * mat_(0UL,index);
260  for( size_t j=1UL; j<end_; j+=2UL ) {
261  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
262  }
263  if( end_ < mat_.rows() ) {
264  res += vec_[end_] * mat_(end_,index);
265  }
266  }
267  else {
268  reset( res );
269  }
270 
271  return res;
272  }
273  //**********************************************************************************************
274 
275  //**Size function*******************************************************************************
280  inline size_t size() const {
281  return mat_.columns();
282  }
283  //**********************************************************************************************
284 
285  //**Left operand access*************************************************************************
290  inline LeftOperand leftOperand() const {
291  return vec_;
292  }
293  //**********************************************************************************************
294 
295  //**Right operand access************************************************************************
300  inline RightOperand rightOperand() const {
301  return mat_;
302  }
303  //**********************************************************************************************
304 
305  //**********************************************************************************************
311  template< typename T >
312  inline bool canAlias( const T* alias ) const {
313  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
314  }
315  //**********************************************************************************************
316 
317  //**********************************************************************************************
323  template< typename T >
324  inline bool isAliased( const T* alias ) const {
325  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
326  }
327  //**********************************************************************************************
328 
329  private:
330  //**Member variables****************************************************************************
333  const size_t end_;
334  //**********************************************************************************************
335 
336  //**Assignment to dense vectors*****************************************************************
349  template< typename VT1 > // Type of the target dense vector
350  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
351  {
353 
354  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
355 
356  if( rhs.mat_.rows() == 0UL ) {
357  reset( ~lhs );
358  return;
359  }
360  else if( rhs.mat_.columns() == 0UL ) {
361  return;
362  }
363 
364  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
365  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
366 
367  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
368  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
369  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
370  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
371 
372  if( ( IsComputation<MT>::value && !evaluate ) ||
373  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
374  TDVecDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
375  else
376  TDVecDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
377  }
379  //**********************************************************************************************
380 
381  //**Default assignment to dense vectors*********************************************************
395  template< typename VT1 // Type of the left-hand side target vector
396  , typename VT2 // Type of the left-hand side vector operand
397  , typename MT1 > // Type of the right-hand side matrix operand
398  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
399  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
400  {
401  const size_t M( A.rows() );
402  const size_t N( A.columns() );
403 
404  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
405  const size_t jend( N & size_t(-2) );
406 
407  for( size_t j=0UL; j<N; ++j ) {
408  y[j] = x[0UL] * A(0UL,j);
409  }
410  for( size_t i=1UL; i<M; ++i ) {
411  for( size_t j=0UL; j<jend; j+=2UL ) {
412  y[j ] += x[i] * A(i,j );
413  y[j+1UL] += x[i] * A(i,j+1UL);
414  }
415  if( jend < N ) {
416  y[jend] += x[i] * A(i,jend);
417  }
418  }
419  }
421  //**********************************************************************************************
422 
423  //**Vectorized default assignment to dense vectors**********************************************
437  template< typename VT1 // Type of the left-hand side target vector
438  , typename VT2 // Type of the left-hand side vector operand
439  , typename MT1 > // Type of the right-hand side matrix operand
440  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
441  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
442  {
443  typedef IntrinsicTrait<ElementType> IT;
444 
445  const size_t M( A.rows() );
446  const size_t N( A.spacing() );
447 
448  size_t j( 0UL );
449 
450  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
451  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
452  for( size_t i=0UL; i<M; ++i ) {
453  const IntrinsicType x1( set( x[i] ) );
454  xmm1 = xmm1 + x1 * A.get(i,j );
455  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
456  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
457  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
458  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
459  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
460  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
461  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
462  }
463  store( &y[j ], xmm1 );
464  store( &y[j+IT::size ], xmm2 );
465  store( &y[j+IT::size*2UL], xmm3 );
466  store( &y[j+IT::size*3UL], xmm4 );
467  store( &y[j+IT::size*4UL], xmm5 );
468  store( &y[j+IT::size*5UL], xmm6 );
469  store( &y[j+IT::size*6UL], xmm7 );
470  store( &y[j+IT::size*7UL], xmm8 );
471  }
472  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
473  IntrinsicType xmm1, xmm2, xmm3, xmm4;
474  for( size_t i=0UL; i<M; ++i ) {
475  const IntrinsicType x1( set( x[i] ) );
476  xmm1 = xmm1 + x1 * A.get(i,j );
477  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
478  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
479  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
480  }
481  store( &y[j ], xmm1 );
482  store( &y[j+IT::size ], xmm2 );
483  store( &y[j+IT::size*2UL], xmm3 );
484  store( &y[j+IT::size*3UL], xmm4 );
485  }
486  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
487  IntrinsicType xmm1, xmm2, xmm3;
488  for( size_t i=0UL; i<M; ++i ) {
489  const IntrinsicType x1( set( x[i] ) );
490  xmm1 = xmm1 + x1 * A.get(i,j );
491  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
492  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
493  }
494  store( &y[j ], xmm1 );
495  store( &y[j+IT::size ], xmm2 );
496  store( &y[j+IT::size*2UL], xmm3 );
497  }
498  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
499  IntrinsicType xmm1, xmm2;
500  for( size_t i=0UL; i<M; ++i ) {
501  const IntrinsicType x1( set( x[i] ) );
502  xmm1 = xmm1 + x1 * A.get(i,j );
503  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
504  }
505  store( &y[j ], xmm1 );
506  store( &y[j+IT::size], xmm2 );
507  }
508  if( j < N ) {
509  IntrinsicType xmm1;
510  for( size_t i=0UL; i<M; ++i ) {
511  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
512  }
513  store( &y[j], xmm1 );
514  }
515  }
517  //**********************************************************************************************
518 
519  //**BLAS-based assignment to dense vectors (default)********************************************
533  template< typename VT1 // Type of the left-hand side target vector
534  , typename VT2 // Type of the left-hand side vector operand
535  , typename MT1 > // Type of the right-hand side matrix operand
536  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
537  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
538  {
539  selectDefaultAssignKernel( y, x, A );
540  }
542  //**********************************************************************************************
543 
544  //**BLAS-based assignment to dense vectors (single precision)***********************************
545 #if BLAZE_BLAS_MODE
546 
559  template< typename VT1 // Type of the left-hand side target vector
560  , typename VT2 // Type of the left-hand side vector operand
561  , typename MT1 > // Type of the right-hand side matrix operand
562  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
563  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
564  {
565  using boost::numeric_cast;
566 
567  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
568  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
569  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
570 
571  const int M ( numeric_cast<int>( A.rows() ) );
572  const int N ( numeric_cast<int>( A.columns() ) );
573  const int lda( numeric_cast<int>( A.spacing() ) );
574 
575  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
576  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
577  }
579 #endif
580  //**********************************************************************************************
581 
582  //**BLAS-based assignment to dense vectors (double precision)***********************************
583 #if BLAZE_BLAS_MODE
584 
597  template< typename VT1 // Type of the left-hand side target vector
598  , typename VT2 // Type of the left-hand side vector operand
599  , typename MT1 > // Type of the right-hand side matrix operand
600  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
601  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
602  {
603  using boost::numeric_cast;
604 
605  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
606  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
607  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
608 
609  const int M ( numeric_cast<int>( A.rows() ) );
610  const int N ( numeric_cast<int>( A.columns() ) );
611  const int lda( numeric_cast<int>( A.spacing() ) );
612 
613  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
614  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
615  }
617 #endif
618  //**********************************************************************************************
619 
620  //**BLAS-based assignment to dense vectors (single precision complex)***************************
621 #if BLAZE_BLAS_MODE
622 
635  template< typename VT1 // Type of the left-hand side target vector
636  , typename VT2 // Type of the left-hand side vector operand
637  , typename MT1 > // Type of the right-hand side matrix operand
638  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
639  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
640  {
641  using boost::numeric_cast;
642 
643  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
644  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
645  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
646  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
647  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
648  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
649 
650  const int M ( numeric_cast<int>( A.rows() ) );
651  const int N ( numeric_cast<int>( A.columns() ) );
652  const int lda( numeric_cast<int>( A.spacing() ) );
653  const complex<float> alpha( 1.0F, 0.0F );
654  const complex<float> beta ( 0.0F, 0.0F );
655 
656  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
657  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
658  }
660 #endif
661  //**********************************************************************************************
662 
663  //**BLAS-based assignment to dense vectors (double precision complex)***************************
664 #if BLAZE_BLAS_MODE
665 
678  template< typename VT1 // Type of the left-hand side target vector
679  , typename VT2 // Type of the left-hand side vector operand
680  , typename MT1 > // Type of the right-hand side matrix operand
681  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
682  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
683  {
684  using boost::numeric_cast;
685 
686  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
687  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
688  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
689  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
690  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
691  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
692 
693  const int M ( numeric_cast<int>( A.rows() ) );
694  const int N ( numeric_cast<int>( A.columns() ) );
695  const int lda( numeric_cast<int>( A.spacing() ) );
696  const complex<double> alpha( 1.0, 0.0 );
697  const complex<double> beta ( 0.0, 0.0 );
698 
699  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
700  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
701  }
703 #endif
704  //**********************************************************************************************
705 
706  //**Assignment to sparse vectors****************************************************************
719  template< typename VT1 > // Type of the target sparse vector
720  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
721  {
723 
726  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
727 
728  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
729 
730  const ResultType tmp( rhs );
731  assign( ~lhs, tmp );
732  }
734  //**********************************************************************************************
735 
736  //**Addition assignment to dense vectors********************************************************
749  template< typename VT1 > // Type of the target dense vector
750  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
751  {
753 
754  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
755 
756  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
757  return;
758  }
759 
760  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
761  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
762 
763  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
764  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
765  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
766  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
767 
768  if( ( IsComputation<MT>::value && !evaluate ) ||
769  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
770  TDVecDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
771  else
772  TDVecDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
773  }
775  //**********************************************************************************************
776 
777  //**Default addition assignment to dense vectors************************************************
791  template< typename VT1 // Type of the left-hand side target vector
792  , typename VT2 // Type of the left-hand side vector operand
793  , typename MT1 > // Type of the right-hand side matrix operand
794  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
795  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
796  {
797  const size_t M( A.rows() );
798  const size_t N( A.columns() );
799 
800  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
801  const size_t jend( N & size_t(-2) );
802 
803  for( size_t i=0UL; i<M; ++i ) {
804  for( size_t j=0UL; j<jend; j+=2UL ) {
805  y[j ] += x[i] * A(i,j );
806  y[j+1UL] += x[i] * A(i,j+1UL);
807  }
808  if( jend < N ) {
809  y[jend] += x[i] * A(i,jend);
810  }
811  }
812  }
814  //**********************************************************************************************
815 
816  //**Vectorized default addition assignment to dense vectors*************************************
830  template< typename VT1 // Type of the left-hand side target vector
831  , typename VT2 // Type of the left-hand side vector operand
832  , typename MT1 > // Type of the right-hand side matrix operand
833  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
834  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
835  {
836  typedef IntrinsicTrait<ElementType> IT;
837 
838  const size_t M( A.rows() );
839  const size_t N( A.spacing() );
840 
841  size_t j( 0UL );
842 
843  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
844  IntrinsicType xmm1( load( &y[j ] ) );
845  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
846  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
847  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
848  IntrinsicType xmm5( load( &y[j+IT::size*4UL] ) );
849  IntrinsicType xmm6( load( &y[j+IT::size*5UL] ) );
850  IntrinsicType xmm7( load( &y[j+IT::size*6UL] ) );
851  IntrinsicType xmm8( load( &y[j+IT::size*7UL] ) );
852  for( size_t i=0UL; i<M; ++i ) {
853  const IntrinsicType x1( set( x[i] ) );
854  xmm1 = xmm1 + x1 * A.get(i,j );
855  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
856  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
857  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
858  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
859  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
860  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
861  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
862  }
863  store( &y[j ], xmm1 );
864  store( &y[j+IT::size ], xmm2 );
865  store( &y[j+IT::size*2UL], xmm3 );
866  store( &y[j+IT::size*3UL], xmm4 );
867  store( &y[j+IT::size*4UL], xmm5 );
868  store( &y[j+IT::size*5UL], xmm6 );
869  store( &y[j+IT::size*6UL], xmm7 );
870  store( &y[j+IT::size*7UL], xmm8 );
871  }
872  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
873  IntrinsicType xmm1( load( &y[j ] ) );
874  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
875  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
876  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
877  for( size_t i=0UL; i<M; ++i ) {
878  const IntrinsicType x1( set( x[i] ) );
879  xmm1 = xmm1 + x1 * A.get(i,j );
880  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
881  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
882  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
883  }
884  store( &y[j ], xmm1 );
885  store( &y[j+IT::size ], xmm2 );
886  store( &y[j+IT::size*2UL], xmm3 );
887  store( &y[j+IT::size*3UL], xmm4 );
888  }
889  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
890  IntrinsicType xmm1( load( &y[j ] ) );
891  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
892  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
893  for( size_t i=0UL; i<M; ++i ) {
894  const IntrinsicType x1( set( x[i] ) );
895  xmm1 = xmm1 + x1 * A.get(i,j );
896  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
897  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
898  }
899  store( &y[j ], xmm1 );
900  store( &y[j+IT::size ], xmm2 );
901  store( &y[j+IT::size*2UL], xmm3 );
902  }
903  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
904  IntrinsicType xmm1( load( &y[j ] ) );
905  IntrinsicType xmm2( load( &y[j+IT::size] ) );
906  for( size_t i=0UL; i<M; ++i ) {
907  const IntrinsicType x1( set( x[i] ) );
908  xmm1 = xmm1 + x1 * A.get(i,j );
909  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
910  }
911  store( &y[j ], xmm1 );
912  store( &y[j+IT::size], xmm2 );
913  }
914  if( j < N ) {
915  IntrinsicType xmm1( load( &y[j] ) );
916  for( size_t i=0UL; i<M; ++i ) {
917  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
918  }
919  store( &y[j], xmm1 );
920  }
921  }
923  //**********************************************************************************************
924 
925  //**BLAS-based addition assignment to dense vectors (default)***********************************
939  template< typename VT1 // Type of the left-hand side target vector
940  , typename VT2 // Type of the left-hand side vector operand
941  , typename MT1 > // Type of the right-hand side matrix operand
942  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
943  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
944  {
945  selectDefaultAddAssignKernel( y, x, A );
946  }
948  //**********************************************************************************************
949 
950  //**BLAS-based addition assignment to dense vectors (single precision)**************************
951 #if BLAZE_BLAS_MODE
952 
965  template< typename VT1 // Type of the left-hand side target vector
966  , typename VT2 // Type of the left-hand side vector operand
967  , typename MT1 > // Type of the right-hand side matrix operand
968  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
969  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
970  {
971  using boost::numeric_cast;
972 
973  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
974  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
975  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
976 
977  const int M ( numeric_cast<int>( A.rows() ) );
978  const int N ( numeric_cast<int>( A.columns() ) );
979  const int lda( numeric_cast<int>( A.spacing() ) );
980 
981  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
982  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
983  }
985 #endif
986  //**********************************************************************************************
987 
988  //**BLAS-based addition assignment to dense vectors (double precision)**************************
989 #if BLAZE_BLAS_MODE
990 
1003  template< typename VT1 // Type of the left-hand side target vector
1004  , typename VT2 // Type of the left-hand side vector operand
1005  , typename MT1 > // Type of the right-hand side matrix operand
1006  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1007  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1008  {
1009  using boost::numeric_cast;
1010 
1011  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1012  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1013  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1014 
1015  const int M ( numeric_cast<int>( A.rows() ) );
1016  const int N ( numeric_cast<int>( A.columns() ) );
1017  const int lda( numeric_cast<int>( A.spacing() ) );
1018 
1019  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
1020  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1021  }
1023 #endif
1024  //**********************************************************************************************
1025 
1026  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1027 #if BLAZE_BLAS_MODE
1028 
1041  template< typename VT1 // Type of the left-hand side target vector
1042  , typename VT2 // Type of the left-hand side vector operand
1043  , typename MT1 > // Type of the right-hand side matrix operand
1044  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1045  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1046  {
1047  using boost::numeric_cast;
1048 
1049  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1050  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1051  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1052  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1053  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1054  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1055 
1056  const int M ( numeric_cast<int>( A.rows() ) );
1057  const int N ( numeric_cast<int>( A.columns() ) );
1058  const int lda( numeric_cast<int>( A.spacing() ) );
1059  const complex<float> alpha( 1.0F, 0.0F );
1060  const complex<float> beta ( 1.0F, 0.0F );
1061 
1062  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1063  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1064  }
1066 #endif
1067  //**********************************************************************************************
1068 
1069  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1070 #if BLAZE_BLAS_MODE
1071 
1084  template< typename VT1 // Type of the left-hand side target vector
1085  , typename VT2 // Type of the left-hand side vector operand
1086  , typename MT1 > // Type of the right-hand side matrix operand
1087  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1088  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1089  {
1090  using boost::numeric_cast;
1091 
1092  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1093  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1094  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1095  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1096  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1097  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1098 
1099  const int M ( numeric_cast<int>( A.rows() ) );
1100  const int N ( numeric_cast<int>( A.columns() ) );
1101  const int lda( numeric_cast<int>( A.spacing() ) );
1102  const complex<double> alpha( 1.0, 0.0 );
1103  const complex<double> beta ( 1.0, 0.0 );
1104 
1105  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1106  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1107  }
1109 #endif
1110  //**********************************************************************************************
1111 
1112  //**Addition assignment to sparse vectors*******************************************************
1113  // No special implementation for the addition assignment to sparse vectors.
1114  //**********************************************************************************************
1115 
1116  //**Subtraction assignment to dense vectors*****************************************************
1129  template< typename VT1 > // Type of the target dense vector
1130  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1131  {
1133 
1134  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1135 
1136  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1137  return;
1138  }
1139 
1140  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1141  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1142 
1143  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1144  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1145  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1146  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1147 
1148  if( ( IsComputation<MT>::value && !evaluate ) ||
1149  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1150  TDVecDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1151  else
1152  TDVecDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1153  }
1155  //**********************************************************************************************
1156 
1157  //**Default subtraction assignment to dense vectors*********************************************
1171  template< typename VT1 // Type of the left-hand side target vector
1172  , typename VT2 // Type of the left-hand side vector operand
1173  , typename MT1 > // Type of the right-hand side matrix operand
1174  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1175  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1176  {
1177  const size_t M( A.rows() );
1178  const size_t N( A.columns() );
1179 
1180  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1181  const size_t jend( N & size_t(-2) );
1182 
1183  for( size_t i=0UL; i<M; ++i ) {
1184  for( size_t j=0UL; j<jend; j+=2UL ) {
1185  y[j ] -= x[i] * A(i,j );
1186  y[j+1UL] -= x[i] * A(i,j+1UL);
1187  }
1188  if( jend < N ) {
1189  y[jend] -= x[i] * A(i,jend);
1190  }
1191  }
1192  }
1194  //**********************************************************************************************
1195 
1196  //**Vectorized default subtraction assignment to dense vectors**********************************
1210  template< typename VT1 // Type of the left-hand side target vector
1211  , typename VT2 // Type of the left-hand side vector operand
1212  , typename MT1 > // Type of the right-hand side matrix operand
1213  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1214  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1215  {
1216  typedef IntrinsicTrait<ElementType> IT;
1217 
1218  const size_t M( A.rows() );
1219  const size_t N( A.spacing() );
1220 
1221  size_t j( 0UL );
1222 
1223  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
1224  IntrinsicType xmm1( load( &y[j ] ) );
1225  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1226  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1227  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
1228  IntrinsicType xmm5( load( &y[j+IT::size*4UL] ) );
1229  IntrinsicType xmm6( load( &y[j+IT::size*5UL] ) );
1230  IntrinsicType xmm7( load( &y[j+IT::size*6UL] ) );
1231  IntrinsicType xmm8( load( &y[j+IT::size*7UL] ) );
1232  for( size_t i=0UL; i<M; ++i ) {
1233  const IntrinsicType x1( set( x[i] ) );
1234  xmm1 = xmm1 - x1 * A.get(i,j );
1235  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1236  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1237  xmm4 = xmm4 - x1 * A.get(i,j+IT::size*3UL);
1238  xmm5 = xmm5 - x1 * A.get(i,j+IT::size*4UL);
1239  xmm6 = xmm6 - x1 * A.get(i,j+IT::size*5UL);
1240  xmm7 = xmm7 - x1 * A.get(i,j+IT::size*6UL);
1241  xmm8 = xmm8 - x1 * A.get(i,j+IT::size*7UL);
1242  }
1243  store( &y[j ], xmm1 );
1244  store( &y[j+IT::size ], xmm2 );
1245  store( &y[j+IT::size*2UL], xmm3 );
1246  store( &y[j+IT::size*3UL], xmm4 );
1247  store( &y[j+IT::size*4UL], xmm5 );
1248  store( &y[j+IT::size*5UL], xmm6 );
1249  store( &y[j+IT::size*6UL], xmm7 );
1250  store( &y[j+IT::size*7UL], xmm8 );
1251  }
1252  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1253  IntrinsicType xmm1( load( &y[j ] ) );
1254  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1255  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1256  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
1257  for( size_t i=0UL; i<M; ++i ) {
1258  const IntrinsicType x1( set( x[i] ) );
1259  xmm1 = xmm1 - x1 * A.get(i,j );
1260  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1261  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1262  xmm4 = xmm4 - x1 * A.get(i,j+IT::size*3UL);
1263  }
1264  store( &y[j ], xmm1 );
1265  store( &y[j+IT::size ], xmm2 );
1266  store( &y[j+IT::size*2UL], xmm3 );
1267  store( &y[j+IT::size*3UL], xmm4 );
1268  }
1269  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
1270  IntrinsicType xmm1( load( &y[j ] ) );
1271  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1272  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1273  for( size_t i=0UL; i<M; ++i ) {
1274  const IntrinsicType x1( set( x[i] ) );
1275  xmm1 = xmm1 - x1 * A.get(i,j );
1276  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1277  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1278  }
1279  store( &y[j ], xmm1 );
1280  store( &y[j+IT::size ], xmm2 );
1281  store( &y[j+IT::size*2UL], xmm3 );
1282  }
1283  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1284  IntrinsicType xmm1( load( &y[j ] ) );
1285  IntrinsicType xmm2( load( &y[j+IT::size] ) );
1286  for( size_t i=0UL; i<M; ++i ) {
1287  const IntrinsicType x1( set( x[i] ) );
1288  xmm1 = xmm1 - x1 * A.get(i,j );
1289  xmm2 = xmm2 - x1 * A.get(i,j+IT::size);
1290  }
1291  store( &y[j ], xmm1 );
1292  store( &y[j+IT::size], xmm2 );
1293  }
1294  if( j < N ) {
1295  IntrinsicType xmm1( load( &y[j] ) );
1296  for( size_t i=0UL; i<M; ++i ) {
1297  xmm1 = xmm1 - set( x[i] ) * A.get(i,j);
1298  }
1299  store( &y[j], xmm1 );
1300  }
1301  }
1303  //**********************************************************************************************
1304 
1305  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1319  template< typename VT1 // Type of the left-hand side target vector
1320  , typename VT2 // Type of the left-hand side vector operand
1321  , typename MT1 > // Type of the right-hand side matrix operand
1322  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1323  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1324  {
1325  selectDefaultSubAssignKernel( y, x, A );
1326  }
1328  //**********************************************************************************************
1329 
1330  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1331 #if BLAZE_BLAS_MODE
1332 
1345  template< typename VT1 // Type of the left-hand side target vector
1346  , typename VT2 // Type of the left-hand side vector operand
1347  , typename MT1 > // Type of the right-hand side matrix operand
1348  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1349  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1350  {
1351  using boost::numeric_cast;
1352 
1353  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1354  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1355  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1356 
1357  const int M ( numeric_cast<int>( A.rows() ) );
1358  const int N ( numeric_cast<int>( A.columns() ) );
1359  const int lda( numeric_cast<int>( A.spacing() ) );
1360 
1361  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -1.0F,
1362  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1363  }
1365 #endif
1366  //**********************************************************************************************
1367 
1368  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1369 #if BLAZE_BLAS_MODE
1370 
1383  template< typename VT1 // Type of the left-hand side target vector
1384  , typename VT2 // Type of the left-hand side vector operand
1385  , typename MT1 > // Type of the right-hand side matrix operand
1386  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1387  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1388  {
1389  using boost::numeric_cast;
1390 
1391  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1392  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1393  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1394 
1395  const int M ( numeric_cast<int>( A.rows() ) );
1396  const int N ( numeric_cast<int>( A.columns() ) );
1397  const int lda( numeric_cast<int>( A.spacing() ) );
1398 
1399  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -1.0,
1400  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1401  }
1403 #endif
1404  //**********************************************************************************************
1405 
1406  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1407 #if BLAZE_BLAS_MODE
1408 
1421  template< typename VT1 // Type of the left-hand side target vector
1422  , typename VT2 // Type of the left-hand side vector operand
1423  , typename MT1 > // Type of the right-hand side matrix operand
1424  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1425  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1426  {
1427  using boost::numeric_cast;
1428 
1429  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1430  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1431  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1432  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1433  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1434  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1435 
1436  const int M ( numeric_cast<int>( A.rows() ) );
1437  const int N ( numeric_cast<int>( A.columns() ) );
1438  const int lda( numeric_cast<int>( A.spacing() ) );
1439  const complex<float> alpha( -1.0F, 0.0F );
1440  const complex<float> beta ( 1.0F, 0.0F );
1441 
1442  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1443  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1444  }
1446 #endif
1447  //**********************************************************************************************
1448 
1449  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1450 #if BLAZE_BLAS_MODE
1451 
1464  template< typename VT1 // Type of the left-hand side target vector
1465  , typename VT2 // Type of the left-hand side vector operand
1466  , typename MT1 > // Type of the right-hand side matrix operand
1467  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1468  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1469  {
1470  using boost::numeric_cast;
1471 
1472  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1473  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1474  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1475  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1476  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1477  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1478 
1479  const int M ( numeric_cast<int>( A.rows() ) );
1480  const int N ( numeric_cast<int>( A.columns() ) );
1481  const int lda( numeric_cast<int>( A.spacing() ) );
1482  const complex<double> alpha( -1.0, 0.0 );
1483  const complex<double> beta ( 1.0, 0.0 );
1484 
1485  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1486  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1487  }
1489 #endif
1490  //**********************************************************************************************
1491 
1492  //**Subtraction assignment to sparse vectors****************************************************
1493  // No special implementation for the subtraction assignment to sparse vectors.
1494  //**********************************************************************************************
1495 
1496  //**Multiplication assignment to dense vectors**************************************************
1509  template< typename VT1 > // Type of the target dense vector
1510  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1511  {
1513 
1516  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1517 
1518  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1519 
1520  const ResultType tmp( rhs );
1521  multAssign( ~lhs, tmp );
1522  }
1524  //**********************************************************************************************
1525 
1526  //**Multiplication assignment to sparse vectors*******************************************************
1527  // No special implementation for the multiplication assignment to sparse vectors.
1528  //**********************************************************************************************
1529 
1530  //**Compile time checks*************************************************************************
1537  //**********************************************************************************************
1538 };
1539 //*************************************************************************************************
1540 
1541 
1542 
1543 
1544 //=================================================================================================
1545 //
1546 // DVECSCALARMULTEXPR SPECIALIZATION
1547 //
1548 //=================================================================================================
1549 
1550 //*************************************************************************************************
1558 template< typename VT // Type of the left-hand side dense vector
1559  , typename MT // Type of the right-hand side dense matrix
1560  , typename ST > // Type of the side scalar value
1561 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
1562  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1563  , private Expression
1564  , private Computation
1565 {
1566  private:
1567  //**Type definitions****************************************************************************
1568  typedef TDVecDMatMultExpr<VT,MT> VMM;
1569  typedef typename VMM::ResultType RES;
1570  typedef typename VT::ResultType VRT;
1571  typedef typename MT::ResultType MRT;
1572  typedef typename VRT::ElementType VET;
1573  typedef typename MRT::ElementType MET;
1574  typedef typename VT::CompositeType VCT;
1575  typedef typename MT::CompositeType MCT;
1576  //**********************************************************************************************
1577 
1578  //**********************************************************************************************
1580  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1581  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1582  //**********************************************************************************************
1583 
1584  //**********************************************************************************************
1586 
1589  template< typename T1, typename T2, typename T3, typename T4 >
1590  struct UseSinglePrecisionKernel {
1591  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1592  IsFloat<typename T1::ElementType>::value &&
1593  IsFloat<typename T2::ElementType>::value &&
1594  IsFloat<typename T3::ElementType>::value &&
1595  !IsComplex<T4>::value };
1596  };
1597  //**********************************************************************************************
1598 
1599  //**********************************************************************************************
1601 
1604  template< typename T1, typename T2, typename T3, typename T4 >
1605  struct UseDoublePrecisionKernel {
1606  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1607  IsDouble<typename T1::ElementType>::value &&
1608  IsDouble<typename T2::ElementType>::value &&
1609  IsDouble<typename T3::ElementType>::value &&
1610  !IsComplex<T4>::value };
1611  };
1612  //**********************************************************************************************
1613 
1614  //**********************************************************************************************
1616 
1619  template< typename T1, typename T2, typename T3 >
1620  struct UseSinglePrecisionComplexKernel {
1621  typedef complex<float> Type;
1622  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1623  IsSame<typename T1::ElementType,Type>::value &&
1624  IsSame<typename T2::ElementType,Type>::value &&
1625  IsSame<typename T3::ElementType,Type>::value };
1626  };
1627  //**********************************************************************************************
1628 
1629  //**********************************************************************************************
1631 
1634  template< typename T1, typename T2, typename T3 >
1635  struct UseDoublePrecisionComplexKernel {
1636  typedef complex<double> Type;
1637  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1638  IsSame<typename T1::ElementType,Type>::value &&
1639  IsSame<typename T2::ElementType,Type>::value &&
1640  IsSame<typename T3::ElementType,Type>::value };
1641  };
1642  //**********************************************************************************************
1643 
1644  //**********************************************************************************************
1646 
1648  template< typename T1, typename T2, typename T3, typename T4 >
1649  struct UseDefaultKernel {
1650  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1651  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1652  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1653  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1654  };
1655  //**********************************************************************************************
1656 
1657  //**********************************************************************************************
1659 
1662  template< typename T1, typename T2, typename T3, typename T4 >
1663  struct UseVectorizedDefaultKernel {
1664  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1665  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1666  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1667  IsSame<typename T1::ElementType,T4>::value &&
1668  IntrinsicTrait<typename T1::ElementType>::addition &&
1669  IntrinsicTrait<typename T1::ElementType>::multiplication };
1670  };
1671  //**********************************************************************************************
1672 
1673  public:
1674  //**Type definitions****************************************************************************
1675  typedef DVecScalarMultExpr<VMM,ST,true> This;
1676  typedef typename MultTrait<RES,ST>::Type ResultType;
1677  typedef typename ResultType::TransposeType TransposeType;
1678  typedef typename ResultType::ElementType ElementType;
1679  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1680  typedef const ElementType ReturnType;
1681  typedef const ResultType CompositeType;
1682 
1684  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
1685 
1687  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
1688 
1690  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
1691 
1693  typedef typename SelectType< evaluate, const MRT, MCT >::Type RT;
1694  //**********************************************************************************************
1695 
1696  //**Compilation flags***************************************************************************
1698  enum { vectorizable = 0 };
1699  //**********************************************************************************************
1700 
1701  //**Constructor*********************************************************************************
1707  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1708  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1709  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1710  {}
1711  //**********************************************************************************************
1712 
1713  //**Subscript operator**************************************************************************
1719  inline ReturnType operator[]( size_t index ) const {
1720  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1721  return vector_[index] * scalar_;
1722  }
1723  //**********************************************************************************************
1724 
1725  //**Size function*******************************************************************************
1730  inline size_t size() const {
1731  return vector_.size();
1732  }
1733  //**********************************************************************************************
1734 
1735  //**Left operand access*************************************************************************
1740  inline LeftOperand leftOperand() const {
1741  return vector_;
1742  }
1743  //**********************************************************************************************
1744 
1745  //**Right operand access************************************************************************
1750  inline RightOperand rightOperand() const {
1751  return scalar_;
1752  }
1753  //**********************************************************************************************
1754 
1755  //**********************************************************************************************
1761  template< typename T >
1762  inline bool canAlias( const T* alias ) const {
1763  return vector_.canAlias( alias );
1764  }
1765  //**********************************************************************************************
1766 
1767  //**********************************************************************************************
1773  template< typename T >
1774  inline bool isAliased( const T* alias ) const {
1775  return vector_.isAliased( alias );
1776  }
1777  //**********************************************************************************************
1778 
1779  private:
1780  //**Member variables****************************************************************************
1781  LeftOperand vector_;
1782  RightOperand scalar_;
1783  //**********************************************************************************************
1784 
1785  //**Assignment to dense vectors*****************************************************************
1797  template< typename VT1 > // Type of the target dense vector
1798  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
1799  {
1801 
1802  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1803 
1804  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1805  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1806 
1807  if( right.rows() == 0UL ) {
1808  reset( ~lhs );
1809  return;
1810  }
1811  else if( right.columns() == 0UL ) {
1812  return;
1813  }
1814 
1815  LT x( left ); // Evaluation of the left-hand side dense vector operand
1816  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1817 
1818  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1819  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1820  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1821  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1822 
1823  if( ( IsComputation<MT>::value && !evaluate ) ||
1824  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1825  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1826  else
1827  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1828  }
1829  //**********************************************************************************************
1830 
1831  //**Default assignment to dense vectors*********************************************************
1845  template< typename VT1 // Type of the left-hand side target vector
1846  , typename VT2 // Type of the left-hand side vector operand
1847  , typename MT1 // Type of the right-hand side matrix operand
1848  , typename ST2 > // Type of the scalar value
1849  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1850  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1851  {
1852  const size_t M( A.rows() );
1853  const size_t N( A.columns() );
1854 
1855  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1856  const size_t jend( N & size_t(-2) );
1857 
1858  for( size_t j=0UL; j<N; ++j ) {
1859  y[j] = x[0UL] * A(0UL,j);
1860  }
1861  for( size_t i=1UL; i<M; ++i ) {
1862  for( size_t j=0UL; j<jend; j+=2UL ) {
1863  y[j ] += x[i] * A(i,j );
1864  y[j+1UL] += x[i] * A(i,j+1UL);
1865  }
1866  if( jend < N ) {
1867  y[jend] += x[i] * A(i,jend);
1868  }
1869  }
1870  for( size_t j=0UL; j<N; ++j ) {
1871  y[j] *= scalar;
1872  }
1873  }
1874  //**********************************************************************************************
1875 
1876  //**Default assignment to dense vectors*********************************************************
1890  template< typename VT1 // Type of the left-hand side target vector
1891  , typename VT2 // Type of the left-hand side vector operand
1892  , typename MT1 // Type of the right-hand side matrix operand
1893  , typename ST2 > // Type of the scalar value
1894  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1895  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1896  {
1897  typedef IntrinsicTrait<ElementType> IT;
1898 
1899  const size_t M( A.rows() );
1900  const size_t N( A.spacing() );
1901 
1902  const IntrinsicType factor( set( scalar ) );
1903 
1904  size_t j( 0UL );
1905 
1906  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
1907  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1908  for( size_t i=0UL; i<M; ++i ) {
1909  const IntrinsicType x1( set( x[i] ) );
1910  xmm1 = xmm1 + x1 * A.get(i,j );
1911  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1912  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1913  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
1914  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
1915  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
1916  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
1917  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
1918  }
1919  store( &y[j ], xmm1*factor );
1920  store( &y[j+IT::size ], xmm2*factor );
1921  store( &y[j+IT::size*2UL], xmm3*factor );
1922  store( &y[j+IT::size*3UL], xmm4*factor );
1923  store( &y[j+IT::size*4UL], xmm5*factor );
1924  store( &y[j+IT::size*5UL], xmm6*factor );
1925  store( &y[j+IT::size*6UL], xmm7*factor );
1926  store( &y[j+IT::size*7UL], xmm8*factor );
1927  }
1928  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1929  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1930  for( size_t i=0UL; i<M; ++i ) {
1931  const IntrinsicType x1( set( x[i] ) );
1932  xmm1 = xmm1 + x1 * A.get(i,j );
1933  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1934  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1935  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
1936  }
1937  store( &y[j ], xmm1*factor );
1938  store( &y[j+IT::size ], xmm2*factor );
1939  store( &y[j+IT::size*2UL], xmm3*factor );
1940  store( &y[j+IT::size*3UL], xmm4*factor );
1941  }
1942  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
1943  IntrinsicType xmm1, xmm2, xmm3;
1944  for( size_t i=0UL; i<M; ++i ) {
1945  const IntrinsicType x1( set( x[i] ) );
1946  xmm1 = xmm1 + x1 * A.get(i,j );
1947  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1948  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1949  }
1950  store( &y[j ], xmm1*factor );
1951  store( &y[j+IT::size ], xmm2*factor );
1952  store( &y[j+IT::size*2UL], xmm3*factor );
1953  }
1954  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1955  IntrinsicType xmm1, xmm2;
1956  for( size_t i=0UL; i<M; ++i ) {
1957  const IntrinsicType x1( set( x[i] ) );
1958  xmm1 = xmm1 + x1 * A.get(i,j );
1959  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
1960  }
1961  store( &y[j ], xmm1*factor );
1962  store( &y[j+IT::size], xmm2*factor );
1963  }
1964  if( j < N ) {
1965  IntrinsicType xmm1;
1966  for( size_t i=0UL; i<M; ++i ) {
1967  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
1968  }
1969  store( &y[j], xmm1*factor );
1970  }
1971  }
1972  //**********************************************************************************************
1973 
1974  //**BLAS-based assignment to dense vectors (default)********************************************
1987  template< typename VT1 // Type of the left-hand side target vector
1988  , typename VT2 // Type of the left-hand side vector operand
1989  , typename MT1 // Type of the right-hand side matrix operand
1990  , typename ST2 > // Type of the scalar value
1991  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1992  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1993  {
1994  selectDefaultAssignKernel( y, x, A, scalar );
1995  }
1996  //**********************************************************************************************
1997 
1998  //**BLAS-based assignment to dense vectors (single precision)***********************************
1999 #if BLAZE_BLAS_MODE
2000 
2013  template< typename VT1 // Type of the left-hand side target vector
2014  , typename VT2 // Type of the left-hand side vector operand
2015  , typename MT1 // Type of the right-hand side matrix operand
2016  , typename ST2 > // Type of the scalar value
2017  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2018  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2019  {
2020  using boost::numeric_cast;
2021 
2022  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2023  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2024  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2025 
2026  const int M ( numeric_cast<int>( A.rows() ) );
2027  const int N ( numeric_cast<int>( A.columns() ) );
2028  const int lda( numeric_cast<int>( A.spacing() ) );
2029 
2030  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2031  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2032  }
2033 #endif
2034  //**********************************************************************************************
2035 
2036  //**BLAS-based assignment to dense vectors (double precision)***********************************
2037 #if BLAZE_BLAS_MODE
2038 
2051  template< typename VT1 // Type of the left-hand side target vector
2052  , typename VT2 // Type of the left-hand side vector operand
2053  , typename MT1 // Type of the right-hand side matrix operand
2054  , typename ST2 > // Type of the scalar value
2055  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2056  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2057  {
2058  using boost::numeric_cast;
2059 
2060  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2061  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2062  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2063 
2064  const int M ( numeric_cast<int>( A.rows() ) );
2065  const int N ( numeric_cast<int>( A.columns() ) );
2066  const int lda( numeric_cast<int>( A.spacing() ) );
2067 
2068  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2069  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2070  }
2071 #endif
2072  //**********************************************************************************************
2073 
2074  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2075 #if BLAZE_BLAS_MODE
2076 
2089  template< typename VT1 // Type of the left-hand side target vector
2090  , typename VT2 // Type of the left-hand side vector operand
2091  , typename MT1 // Type of the right-hand side matrix operand
2092  , typename ST2 > // Type of the scalar value
2093  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2094  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2095  {
2096  using boost::numeric_cast;
2097 
2098  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2099  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2100  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2102  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2103  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2104  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2105 
2106  const int M ( numeric_cast<int>( A.rows() ) );
2107  const int N ( numeric_cast<int>( A.columns() ) );
2108  const int lda( numeric_cast<int>( A.spacing() ) );
2109  const complex<float> alpha( scalar );
2110  const complex<float> beta ( 0.0F, 0.0F );
2111 
2112  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2113  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2114  }
2115 #endif
2116  //**********************************************************************************************
2117 
2118  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2119 #if BLAZE_BLAS_MODE
2120 
2133  template< typename VT1 // Type of the left-hand side target vector
2134  , typename VT2 // Type of the left-hand side vector operand
2135  , typename MT1 // Type of the right-hand side matrix operand
2136  , typename ST2 > // Type of the scalar value
2137  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2138  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2139  {
2140  using boost::numeric_cast;
2141 
2142  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2143  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2144  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2146  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2147  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2148  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2149 
2150  const int M ( numeric_cast<int>( A.rows() ) );
2151  const int N ( numeric_cast<int>( A.columns() ) );
2152  const int lda( numeric_cast<int>( A.spacing() ) );
2153  const complex<double> alpha( scalar );
2154  const complex<double> beta ( 0.0, 0.0 );
2155 
2156  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2157  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2158  }
2159 #endif
2160  //**********************************************************************************************
2161 
2162  //**Assignment to sparse vectors****************************************************************
2174  template< typename VT1 > // Type of the target sparse vector
2175  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2176  {
2178 
2181  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2182 
2183  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2184 
2185  const ResultType tmp( rhs );
2186  assign( ~lhs, tmp );
2187  }
2188  //**********************************************************************************************
2189 
2190  //**Addition assignment to dense vectors********************************************************
2202  template< typename VT1 > // Type of the target dense vector
2203  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2204  {
2206 
2207  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2208 
2209  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2210  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2211 
2212  if( right.rows() == 0UL || right.columns() == 0UL ) {
2213  return;
2214  }
2215 
2216  LT x( left ); // Evaluation of the left-hand side dense vector operand
2217  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2218 
2219  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2220  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2221  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2222  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2223 
2224  if( ( IsComputation<MT>::value && !evaluate ) ||
2225  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2226  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2227  else
2228  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2229  }
2230  //**********************************************************************************************
2231 
2232  //**Default addition assignment to dense vectors************************************************
2246  template< typename VT1 // Type of the left-hand side target vector
2247  , typename VT2 // Type of the left-hand side vector operand
2248  , typename MT1 // Type of the right-hand side matrix operand
2249  , typename ST2 > // Type of the scalar value
2250  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2251  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2252  {
2253  y.addAssign( x * A * scalar );
2254  }
2255  //**********************************************************************************************
2256 
2257  //**Vectorized default addition assignment to dense vectors*************************************
2271  template< typename VT1 // Type of the left-hand side target vector
2272  , typename VT2 // Type of the left-hand side vector operand
2273  , typename MT1 // Type of the right-hand side matrix operand
2274  , typename ST2 > // Type of the scalar value
2275  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2276  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2277  {
2278  typedef IntrinsicTrait<ElementType> IT;
2279 
2280  const size_t M( A.rows() );
2281  const size_t N( A.spacing() );
2282 
2283  const IntrinsicType factor( set( scalar ) );
2284 
2285  size_t j( 0UL );
2286 
2287  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2288  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2289  for( size_t i=0UL; i<M; ++i ) {
2290  const IntrinsicType x1( set( x[i] ) );
2291  xmm1 = xmm1 + x1 * A.get(i,j );
2292  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2293  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2294  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2295  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
2296  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
2297  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
2298  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
2299  }
2300  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2301  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2302  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2303  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) + xmm4*factor );
2304  store( &y[j+IT::size*4UL], load( &y[j+IT::size*4UL] ) + xmm5*factor );
2305  store( &y[j+IT::size*5UL], load( &y[j+IT::size*5UL] ) + xmm6*factor );
2306  store( &y[j+IT::size*6UL], load( &y[j+IT::size*6UL] ) + xmm7*factor );
2307  store( &y[j+IT::size*7UL], load( &y[j+IT::size*7UL] ) + xmm8*factor );
2308  }
2309  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2310  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2311  for( size_t i=0UL; i<M; ++i ) {
2312  const IntrinsicType x1( set( x[i] ) );
2313  xmm1 = xmm1 + x1 * A.get(i,j );
2314  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2315  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2316  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2317  }
2318  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2319  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2320  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2321  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) + xmm4*factor );
2322  }
2323  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
2324  IntrinsicType xmm1, xmm2, xmm3;
2325  for( size_t i=0UL; i<M; ++i ) {
2326  const IntrinsicType x1( set( x[i] ) );
2327  xmm1 = xmm1 + x1 * A.get(i,j );
2328  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2329  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2330  }
2331  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2332  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2333  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2334  }
2335  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2336  IntrinsicType xmm1, xmm2;
2337  for( size_t i=0UL; i<M; ++i ) {
2338  const IntrinsicType x1( set( x[i] ) );
2339  xmm1 = xmm1 + x1 * A.get(i,j );
2340  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
2341  }
2342  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2343  store( &y[j+IT::size], load( &y[j+IT::size] ) + xmm2*factor );
2344  }
2345  if( j < N ) {
2346  IntrinsicType xmm1;
2347  for( size_t i=0UL; i<M; ++i ) {
2348  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
2349  }
2350  store( &y[j], load( &y[j] ) + xmm1*factor );
2351  }
2352  }
2353  //**********************************************************************************************
2354 
2355  //**BLAS-based addition assignment to dense vectors (default)***********************************
2369  template< typename VT1 // Type of the left-hand side target vector
2370  , typename VT2 // Type of the left-hand side vector operand
2371  , typename MT1 // Type of the right-hand side matrix operand
2372  , typename ST2 > // Type of the scalar value
2373  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2374  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2375  {
2376  selectDefaultAddAssignKernel( y, x, A, scalar );
2377  }
2378  //**********************************************************************************************
2379 
2380  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2381 #if BLAZE_BLAS_MODE
2382 
2395  template< typename VT1 // Type of the left-hand side target vector
2396  , typename VT2 // Type of the left-hand side vector operand
2397  , typename MT1 // Type of the right-hand side matrix operand
2398  , typename ST2 > // Type of the scalar value
2399  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2400  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2401  {
2402  using boost::numeric_cast;
2403 
2404  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2405  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2406  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2407 
2408  const int M ( numeric_cast<int>( A.rows() ) );
2409  const int N ( numeric_cast<int>( A.columns() ) );
2410  const int lda( numeric_cast<int>( A.spacing() ) );
2411 
2412  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2413  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2414  }
2415 #endif
2416  //**********************************************************************************************
2417 
2418  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2419 #if BLAZE_BLAS_MODE
2420 
2433  template< typename VT1 // Type of the left-hand side target vector
2434  , typename VT2 // Type of the left-hand side vector operand
2435  , typename MT1 // Type of the right-hand side matrix operand
2436  , typename ST2 > // Type of the scalar value
2437  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2438  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2439  {
2440  using boost::numeric_cast;
2441 
2442  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2443  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2444  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2445 
2446  const int M ( numeric_cast<int>( A.rows() ) );
2447  const int N ( numeric_cast<int>( A.columns() ) );
2448  const int lda( numeric_cast<int>( A.spacing() ) );
2449 
2450  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2451  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2452  }
2453 #endif
2454  //**********************************************************************************************
2455 
2456  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2457 #if BLAZE_BLAS_MODE
2458 
2471  template< typename VT1 // Type of the left-hand side target vector
2472  , typename VT2 // Type of the left-hand side vector operand
2473  , typename MT1 // Type of the right-hand side matrix operand
2474  , typename ST2 > // Type of the scalar value
2475  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2476  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2477  {
2478  using boost::numeric_cast;
2479 
2480  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2481  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2482  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2484  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2485  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2486  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2487 
2488  const int M ( numeric_cast<int>( A.rows() ) );
2489  const int N ( numeric_cast<int>( A.columns() ) );
2490  const int lda( numeric_cast<int>( A.spacing() ) );
2491  const complex<float> alpha( scalar );
2492  const complex<float> beta ( 1.0F, 0.0F );
2493 
2494  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2495  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2496  }
2497 #endif
2498  //**********************************************************************************************
2499 
2500  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2501 #if BLAZE_BLAS_MODE
2502 
2515  template< typename VT1 // Type of the left-hand side target vector
2516  , typename VT2 // Type of the left-hand side vector operand
2517  , typename MT1 // Type of the right-hand side matrix operand
2518  , typename ST2 > // Type of the scalar value
2519  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2520  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2521  {
2522  using boost::numeric_cast;
2523 
2524  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2525  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2526  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2528  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2529  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2530  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2531 
2532  const int M ( numeric_cast<int>( A.rows() ) );
2533  const int N ( numeric_cast<int>( A.columns() ) );
2534  const int lda( numeric_cast<int>( A.spacing() ) );
2535  const complex<double> alpha( scalar );
2536  const complex<double> beta ( 1.0, 0.0 );
2537 
2538  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2539  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2540  }
2541 #endif
2542  //**********************************************************************************************
2543 
2544  //**Addition assignment to sparse vectors*******************************************************
2545  // No special implementation for the addition assignment to sparse vectors.
2546  //**********************************************************************************************
2547 
2548  //**Subtraction assignment to dense vectors*****************************************************
2560  template< typename VT1 > // Type of the target dense vector
2561  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2562  {
2564 
2565  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2566 
2567  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2568  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2569 
2570  if( right.rows() == 0UL || right.columns() == 0UL ) {
2571  return;
2572  }
2573 
2574  LT x( left ); // Evaluation of the left-hand side dense vector operand
2575  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2576 
2577  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2578  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2579  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2580  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2581 
2582  if( ( IsComputation<MT>::value && !evaluate ) ||
2583  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2584  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2585  else
2586  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2587  }
2588  //**********************************************************************************************
2589 
2590  //**Default subtraction assignment to dense vectors*********************************************
2604  template< typename VT1 // Type of the left-hand side target vector
2605  , typename VT2 // Type of the left-hand side vector operand
2606  , typename MT1 // Type of the right-hand side matrix operand
2607  , typename ST2 > // Type of the scalar value
2608  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2609  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2610  {
2611  y.subAssign( x * A * scalar );
2612  }
2613  //**********************************************************************************************
2614 
2615  //**Vectorized default subtraction assignment to dense vectors**********************************
2629  template< typename VT1 // Type of the left-hand side target vector
2630  , typename VT2 // Type of the left-hand side vector operand
2631  , typename MT1 // Type of the right-hand side matrix operand
2632  , typename ST2 > // Type of the scalar value
2633  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2634  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2635  {
2636  typedef IntrinsicTrait<ElementType> IT;
2637 
2638  const size_t M( A.rows() );
2639  const size_t N( A.spacing() );
2640 
2641  const IntrinsicType factor( set( scalar ) );
2642 
2643  size_t j( 0UL );
2644 
2645  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2646  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2647  for( size_t i=0UL; i<M; ++i ) {
2648  const IntrinsicType x1( set( x[i] ) );
2649  xmm1 = xmm1 + x1 * A.get(i,j );
2650  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2651  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2652  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2653  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
2654  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
2655  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
2656  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
2657  }
2658  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2659  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2660  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2661  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) - xmm4*factor );
2662  store( &y[j+IT::size*4UL], load( &y[j+IT::size*4UL] ) - xmm5*factor );
2663  store( &y[j+IT::size*5UL], load( &y[j+IT::size*5UL] ) - xmm6*factor );
2664  store( &y[j+IT::size*6UL], load( &y[j+IT::size*6UL] ) - xmm7*factor );
2665  store( &y[j+IT::size*7UL], load( &y[j+IT::size*7UL] ) - xmm8*factor );
2666  }
2667  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2668  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2669  for( size_t i=0UL; i<M; ++i ) {
2670  const IntrinsicType x1( set( x[i] ) );
2671  xmm1 = xmm1 + x1 * A.get(i,j );
2672  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2673  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2674  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2675  }
2676  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2677  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2678  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2679  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) - xmm4*factor );
2680  }
2681  for( ; (j+IT::size*3UL) <= N; j+=IT::size*3UL ) {
2682  IntrinsicType xmm1, xmm2, xmm3;
2683  for( size_t i=0UL; i<M; ++i ) {
2684  const IntrinsicType x1( set( x[i] ) );
2685  xmm1 = xmm1 + x1 * A.get(i,j );
2686  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2687  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2688  }
2689  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2690  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2691  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2692  }
2693  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2694  IntrinsicType xmm1, xmm2;
2695  for( size_t i=0UL; i<M; ++i ) {
2696  const IntrinsicType x1( set( x[i] ) );
2697  xmm1 = xmm1 + x1 * A.get(i,j );
2698  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
2699  }
2700  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2701  store( &y[j+IT::size], load( &y[j+IT::size] ) - xmm2*factor );
2702  }
2703  if( j < N ) {
2704  IntrinsicType xmm1;
2705  for( size_t i=0UL; i<M; ++i ) {
2706  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
2707  }
2708  store( &y[j], load( &y[j] ) - xmm1*factor );
2709  }
2710  }
2711  //**********************************************************************************************
2712 
2713  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2727  template< typename VT1 // Type of the left-hand side target vector
2728  , typename VT2 // Type of the left-hand side vector operand
2729  , typename MT1 // Type of the right-hand side matrix operand
2730  , typename ST2 > // Type of the scalar value
2731  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2732  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2733  {
2734  selectDefaultSubAssignKernel( y, x, A, scalar );
2735  }
2736  //**********************************************************************************************
2737 
2738  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2739 #if BLAZE_BLAS_MODE
2740 
2753  template< typename VT1 // Type of the left-hand side target vector
2754  , typename VT2 // Type of the left-hand side vector operand
2755  , typename MT1 // Type of the right-hand side matrix operand
2756  , typename ST2 > // Type of the scalar value
2757  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2758  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2759  {
2760  using boost::numeric_cast;
2761 
2762  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2763  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2764  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2765 
2766  const int M ( numeric_cast<int>( A.rows() ) );
2767  const int N ( numeric_cast<int>( A.columns() ) );
2768  const int lda( numeric_cast<int>( A.spacing() ) );
2769 
2770  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2771  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2772  }
2773 #endif
2774  //**********************************************************************************************
2775 
2776  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2777 #if BLAZE_BLAS_MODE
2778 
2791  template< typename VT1 // Type of the left-hand side target vector
2792  , typename VT2 // Type of the left-hand side vector operand
2793  , typename MT1 // Type of the right-hand side matrix operand
2794  , typename ST2 > // Type of the scalar value
2795  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2796  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2797  {
2798  using boost::numeric_cast;
2799 
2800  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2801  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2802  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2803 
2804  const int M ( numeric_cast<int>( A.rows() ) );
2805  const int N ( numeric_cast<int>( A.columns() ) );
2806  const int lda( numeric_cast<int>( A.spacing() ) );
2807 
2808  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2809  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2810  }
2811 #endif
2812  //**********************************************************************************************
2813 
2814  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2815 #if BLAZE_BLAS_MODE
2816 
2829  template< typename VT1 // Type of the left-hand side target vector
2830  , typename VT2 // Type of the left-hand side vector operand
2831  , typename MT1 // Type of the right-hand side matrix operand
2832  , typename ST2 > // Type of the scalar value
2833  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2834  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2835  {
2836  using boost::numeric_cast;
2837 
2838  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2839  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2840  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2842  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2843  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2844  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2845 
2846  const int M ( numeric_cast<int>( A.rows() ) );
2847  const int N ( numeric_cast<int>( A.columns() ) );
2848  const int lda( numeric_cast<int>( A.spacing() ) );
2849  const complex<float> alpha( -scalar );
2850  const complex<float> beta ( 1.0F, 0.0F );
2851 
2852  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2853  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2854  }
2855 #endif
2856  //**********************************************************************************************
2857 
2858  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2859 #if BLAZE_BLAS_MODE
2860 
2873  template< typename VT1 // Type of the left-hand side target vector
2874  , typename VT2 // Type of the left-hand side vector operand
2875  , typename MT1 // Type of the right-hand side matrix operand
2876  , typename ST2 > // Type of the scalar value
2877  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2878  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2879  {
2880  using boost::numeric_cast;
2881 
2882  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2883  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2884  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2886  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2887  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2888  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2889 
2890  const int M ( numeric_cast<int>( A.rows() ) );
2891  const int N ( numeric_cast<int>( A.columns() ) );
2892  const int lda( numeric_cast<int>( A.spacing() ) );
2893  const complex<double> alpha( -scalar );
2894  const complex<double> beta ( 1.0, 0.0 );
2895 
2896  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2897  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2898  }
2899 #endif
2900  //**********************************************************************************************
2901 
2902  //**Subtraction assignment to sparse vectors****************************************************
2903  // No special implementation for the subtraction assignment to sparse vectors.
2904  //**********************************************************************************************
2905 
2906  //**Multiplication assignment to dense vectors**************************************************
2918  template< typename VT1 > // Type of the target dense vector
2919  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2920  {
2922 
2925  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2926 
2927  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2928 
2929  const ResultType tmp( rhs );
2930  multAssign( ~lhs, tmp );
2931  }
2932  //**********************************************************************************************
2933 
2934  //**Multiplication assignment to sparse vectors*******************************************************
2935  // No special implementation for the multiplication assignment to sparse vectors.
2936  //**********************************************************************************************
2937 
2938  //**Compile time checks*************************************************************************
2946  //**********************************************************************************************
2947 };
2949 //*************************************************************************************************
2950 
2951 
2952 
2953 
2954 //=================================================================================================
2955 //
2956 // GLOBAL BINARY ARITHMETIC OPERATORS
2957 //
2958 //=================================================================================================
2959 
2960 //*************************************************************************************************
2991 template< typename T1 // Type of the left-hand side dense vector
2992  , typename T2 > // Type of the right-hand side dense matrix
2993 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
2995 {
2997 
2998  if( (~vec).size() != (~mat).rows() )
2999  throw std::invalid_argument( "Vector and matrix sizes do not match" );
3000 
3001  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
3002 }
3003 //*************************************************************************************************
3004 
3005 
3006 
3007 
3008 //=================================================================================================
3009 //
3010 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
3011 //
3012 //=================================================================================================
3013 
3014 //*************************************************************************************************
3027 template< typename T1 // Type of the left-hand side dense vector
3028  , typename T2 // Type of the right-hand side dense matrix
3029  , bool SO > // Storage order of the right-hand side dense matrix
3030 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
3032 {
3034 
3035  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
3036 }
3037 //*************************************************************************************************
3038 
3039 } // namespace blaze
3040 
3041 #endif