All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
41 #include <blaze/math/Intrinsics.h>
42 #include <blaze/math/shims/Reset.h>
50 #include <blaze/system/BLAS.h>
52 #include <blaze/util/Assert.h>
53 #include <blaze/util/Complex.h>
59 #include <blaze/util/DisableIf.h>
60 #include <blaze/util/EnableIf.h>
62 #include <blaze/util/SelectType.h>
63 #include <blaze/util/Types.h>
69 
70 
71 namespace blaze {
72 
73 //=================================================================================================
74 //
75 // CLASS TDVECDMATMULTEXPR
76 //
77 //=================================================================================================
78 
79 //*************************************************************************************************
86 template< typename VT // Type of the left-hand side dense vector
87  , typename MT > // Type of the right-hand side dense matrix
88 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
89  , private TVecMatMultExpr
90  , private Computation
91 {
92  private:
93  //**Type definitions****************************************************************************
94  typedef typename VT::ResultType VRT;
95  typedef typename MT::ResultType MRT;
96  typedef typename VRT::ElementType VET;
97  typedef typename MRT::ElementType MET;
98  typedef typename VT::CompositeType VCT;
99  typedef typename MT::CompositeType MCT;
100  //**********************************************************************************************
101 
102  //**********************************************************************************************
104  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
106  //**********************************************************************************************
107 
108  //**********************************************************************************************
110 
111 
114  template< typename T1, typename T2, typename T3 >
115  struct UseSinglePrecisionKernel {
116  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
120  };
122  //**********************************************************************************************
123 
124  //**********************************************************************************************
126 
127 
130  template< typename T1, typename T2, typename T3 >
131  struct UseDoublePrecisionKernel {
132  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
133  IsDouble<typename T1::ElementType>::value &&
134  IsDouble<typename T2::ElementType>::value &&
135  IsDouble<typename T3::ElementType>::value };
136  };
138  //**********************************************************************************************
139 
140  //**********************************************************************************************
142 
143 
146  template< typename T1, typename T2, typename T3 >
147  struct UseSinglePrecisionComplexKernel {
148  typedef complex<float> Type;
149  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
150  IsSame<typename T1::ElementType,Type>::value &&
151  IsSame<typename T2::ElementType,Type>::value &&
152  IsSame<typename T3::ElementType,Type>::value };
153  };
155  //**********************************************************************************************
156 
157  //**********************************************************************************************
159 
160 
163  template< typename T1, typename T2, typename T3 >
164  struct UseDoublePrecisionComplexKernel {
165  typedef complex<double> Type;
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
167  IsSame<typename T1::ElementType,Type>::value &&
168  IsSame<typename T2::ElementType,Type>::value &&
169  IsSame<typename T3::ElementType,Type>::value };
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
177 
179  template< typename T1, typename T2, typename T3 >
180  struct UseDefaultKernel {
181  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
182  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
183  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
184  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
185  };
187  //**********************************************************************************************
188 
189  //**********************************************************************************************
191 
192 
195  template< typename T1, typename T2, typename T3 >
196  struct UseVectorizedDefaultKernel {
197  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
198  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
199  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
200  IntrinsicTrait<typename T1::ElementType>::addition &&
201  IntrinsicTrait<typename T1::ElementType>::multiplication };
202  };
204  //**********************************************************************************************
205 
206  public:
207  //**Type definitions****************************************************************************
210  typedef typename ResultType::TransposeType TransposeType;
211  typedef typename ResultType::ElementType ElementType;
213  typedef const ElementType ReturnType;
214  typedef const ResultType CompositeType;
215 
217  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
218 
220  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
221 
223  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
224 
227  //**********************************************************************************************
228 
229  //**Compilation flags***************************************************************************
231  enum { vectorizable = 0 };
232  //**********************************************************************************************
233 
234  //**Constructor*********************************************************************************
240  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
241  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
242  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
243  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
244  {
245  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
246  }
247  //**********************************************************************************************
248 
249  //**Subscript operator**************************************************************************
255  inline ReturnType operator[]( size_t index ) const {
256  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
257 
258  ElementType res;
259 
260  if( mat_.rows() != 0UL ) {
261  res = vec_[0UL] * mat_(0UL,index);
262  for( size_t j=1UL; j<end_; j+=2UL ) {
263  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
264  }
265  if( end_ < mat_.rows() ) {
266  res += vec_[end_] * mat_(end_,index);
267  }
268  }
269  else {
270  reset( res );
271  }
272 
273  return res;
274  }
275  //**********************************************************************************************
276 
277  //**Size function*******************************************************************************
282  inline size_t size() const {
283  return mat_.columns();
284  }
285  //**********************************************************************************************
286 
287  //**Left operand access*************************************************************************
292  inline LeftOperand leftOperand() const {
293  return vec_;
294  }
295  //**********************************************************************************************
296 
297  //**Right operand access************************************************************************
302  inline RightOperand rightOperand() const {
303  return mat_;
304  }
305  //**********************************************************************************************
306 
307  //**********************************************************************************************
313  template< typename T >
314  inline bool canAlias( const T* alias ) const {
315  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
316  }
317  //**********************************************************************************************
318 
319  //**********************************************************************************************
325  template< typename T >
326  inline bool isAliased( const T* alias ) const {
327  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
328  }
329  //**********************************************************************************************
330 
331  private:
332  //**Member variables****************************************************************************
335  const size_t end_;
336  //**********************************************************************************************
337 
338  //**Assignment to dense vectors*****************************************************************
351  template< typename VT1 > // Type of the target dense vector
352  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
353  {
355 
356  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
357 
358  if( rhs.mat_.rows() == 0UL ) {
359  reset( ~lhs );
360  return;
361  }
362  else if( rhs.mat_.columns() == 0UL ) {
363  return;
364  }
365 
366  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
367  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
368 
369  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
370  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
371  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
372  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
373 
374  if( ( IsComputation<MT>::value && !evaluate ) ||
375  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
376  TDVecDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
377  else
378  TDVecDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
379  }
381  //**********************************************************************************************
382 
383  //**Default assignment to dense vectors*********************************************************
397  template< typename VT1 // Type of the left-hand side target vector
398  , typename VT2 // Type of the left-hand side vector operand
399  , typename MT1 > // Type of the right-hand side matrix operand
400  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
401  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
402  {
403  const size_t M( A.rows() );
404  const size_t N( A.columns() );
405 
406  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
407  const size_t jend( N & size_t(-2) );
408 
409  for( size_t j=0UL; j<N; ++j ) {
410  y[j] = x[0UL] * A(0UL,j);
411  }
412  for( size_t i=1UL; i<M; ++i ) {
413  for( size_t j=0UL; j<jend; j+=2UL ) {
414  y[j ] += x[i] * A(i,j );
415  y[j+1UL] += x[i] * A(i,j+1UL);
416  }
417  if( jend < N ) {
418  y[jend] += x[i] * A(i,jend);
419  }
420  }
421  }
423  //**********************************************************************************************
424 
425  //**Vectorized default assignment to dense vectors**********************************************
439  template< typename VT1 // Type of the left-hand side target vector
440  , typename VT2 // Type of the left-hand side vector operand
441  , typename MT1 > // Type of the right-hand side matrix operand
442  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
443  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
444  {
445  typedef IntrinsicTrait<ElementType> IT;
446 
447  const size_t M( A.rows() );
448  const size_t N( A.columns() );
449 
450  size_t j( 0UL );
451 
452  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
453  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
454  for( size_t i=0UL; i<M; ++i ) {
455  const IntrinsicType x1( set( x[i] ) );
456  xmm1 = xmm1 + x1 * A.get(i,j );
457  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
458  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
459  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
460  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
461  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
462  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
463  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
464  }
465  store( &y[j ], xmm1 );
466  store( &y[j+IT::size ], xmm2 );
467  store( &y[j+IT::size*2UL], xmm3 );
468  store( &y[j+IT::size*3UL], xmm4 );
469  store( &y[j+IT::size*4UL], xmm5 );
470  store( &y[j+IT::size*5UL], xmm6 );
471  store( &y[j+IT::size*6UL], xmm7 );
472  store( &y[j+IT::size*7UL], xmm8 );
473  }
474  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
475  IntrinsicType xmm1, xmm2, xmm3, xmm4;
476  for( size_t i=0UL; i<M; ++i ) {
477  const IntrinsicType x1( set( x[i] ) );
478  xmm1 = xmm1 + x1 * A.get(i,j );
479  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
480  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
481  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
482  }
483  store( &y[j ], xmm1 );
484  store( &y[j+IT::size ], xmm2 );
485  store( &y[j+IT::size*2UL], xmm3 );
486  store( &y[j+IT::size*3UL], xmm4 );
487  }
488  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
489  IntrinsicType xmm1, xmm2, xmm3;
490  for( size_t i=0UL; i<M; ++i ) {
491  const IntrinsicType x1( set( x[i] ) );
492  xmm1 = xmm1 + x1 * A.get(i,j );
493  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
494  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
495  }
496  store( &y[j ], xmm1 );
497  store( &y[j+IT::size ], xmm2 );
498  store( &y[j+IT::size*2UL], xmm3 );
499  }
500  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
501  IntrinsicType xmm1, xmm2;
502  for( size_t i=0UL; i<M; ++i ) {
503  const IntrinsicType x1( set( x[i] ) );
504  xmm1 = xmm1 + x1 * A.get(i,j );
505  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
506  }
507  store( &y[j ], xmm1 );
508  store( &y[j+IT::size], xmm2 );
509  }
510  if( j < N ) {
511  IntrinsicType xmm1;
512  for( size_t i=0UL; i<M; ++i ) {
513  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
514  }
515  store( &y[j], xmm1 );
516  }
517  }
519  //**********************************************************************************************
520 
521  //**BLAS-based assignment to dense vectors (default)********************************************
535  template< typename VT1 // Type of the left-hand side target vector
536  , typename VT2 // Type of the left-hand side vector operand
537  , typename MT1 > // Type of the right-hand side matrix operand
538  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
539  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
540  {
541  selectDefaultAssignKernel( y, x, A );
542  }
544  //**********************************************************************************************
545 
546  //**BLAS-based assignment to dense vectors (single precision)***********************************
547 #if BLAZE_BLAS_MODE
548 
561  template< typename VT1 // Type of the left-hand side target vector
562  , typename VT2 // Type of the left-hand side vector operand
563  , typename MT1 > // Type of the right-hand side matrix operand
564  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
565  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
566  {
567  using boost::numeric_cast;
568 
569  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
571  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
572 
573  const int M ( numeric_cast<int>( A.rows() ) );
574  const int N ( numeric_cast<int>( A.columns() ) );
575  const int lda( numeric_cast<int>( A.spacing() ) );
576 
577  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
578  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
579  }
581 #endif
582  //**********************************************************************************************
583 
584  //**BLAS-based assignment to dense vectors (double precision)***********************************
585 #if BLAZE_BLAS_MODE
586 
599  template< typename VT1 // Type of the left-hand side target vector
600  , typename VT2 // Type of the left-hand side vector operand
601  , typename MT1 > // Type of the right-hand side matrix operand
602  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
603  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
604  {
605  using boost::numeric_cast;
606 
607  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
608  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
609  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
610 
611  const int M ( numeric_cast<int>( A.rows() ) );
612  const int N ( numeric_cast<int>( A.columns() ) );
613  const int lda( numeric_cast<int>( A.spacing() ) );
614 
615  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
616  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
617  }
619 #endif
620  //**********************************************************************************************
621 
622  //**BLAS-based assignment to dense vectors (single precision complex)***************************
623 #if BLAZE_BLAS_MODE
624 
637  template< typename VT1 // Type of the left-hand side target vector
638  , typename VT2 // Type of the left-hand side vector operand
639  , typename MT1 > // Type of the right-hand side matrix operand
640  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
641  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
642  {
643  using boost::numeric_cast;
644 
645  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
646  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
647  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
648  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
649  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
650  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
651 
652  const int M ( numeric_cast<int>( A.rows() ) );
653  const int N ( numeric_cast<int>( A.columns() ) );
654  const int lda( numeric_cast<int>( A.spacing() ) );
655  const complex<float> alpha( 1.0F, 0.0F );
656  const complex<float> beta ( 0.0F, 0.0F );
657 
658  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
659  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
660  }
662 #endif
663  //**********************************************************************************************
664 
665  //**BLAS-based assignment to dense vectors (double precision complex)***************************
666 #if BLAZE_BLAS_MODE
667 
680  template< typename VT1 // Type of the left-hand side target vector
681  , typename VT2 // Type of the left-hand side vector operand
682  , typename MT1 > // Type of the right-hand side matrix operand
683  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
684  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
685  {
686  using boost::numeric_cast;
687 
688  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
689  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
690  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
691  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
692  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
693  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
694 
695  const int M ( numeric_cast<int>( A.rows() ) );
696  const int N ( numeric_cast<int>( A.columns() ) );
697  const int lda( numeric_cast<int>( A.spacing() ) );
698  const complex<double> alpha( 1.0, 0.0 );
699  const complex<double> beta ( 0.0, 0.0 );
700 
701  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
702  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
703  }
705 #endif
706  //**********************************************************************************************
707 
708  //**Assignment to sparse vectors****************************************************************
721  template< typename VT1 > // Type of the target sparse vector
722  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
723  {
725 
728  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
729 
730  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
731 
732  const ResultType tmp( rhs );
733  assign( ~lhs, tmp );
734  }
736  //**********************************************************************************************
737 
738  //**Addition assignment to dense vectors********************************************************
751  template< typename VT1 > // Type of the target dense vector
752  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
753  {
755 
756  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
757 
758  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
759  return;
760  }
761 
762  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
763  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
764 
765  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
766  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
767  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
768  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
769 
770  if( ( IsComputation<MT>::value && !evaluate ) ||
771  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
772  TDVecDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
773  else
774  TDVecDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
775  }
777  //**********************************************************************************************
778 
779  //**Default addition assignment to dense vectors************************************************
793  template< typename VT1 // Type of the left-hand side target vector
794  , typename VT2 // Type of the left-hand side vector operand
795  , typename MT1 > // Type of the right-hand side matrix operand
796  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
797  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
798  {
799  const size_t M( A.rows() );
800  const size_t N( A.columns() );
801 
802  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
803  const size_t jend( N & size_t(-2) );
804 
805  for( size_t i=0UL; i<M; ++i ) {
806  for( size_t j=0UL; j<jend; j+=2UL ) {
807  y[j ] += x[i] * A(i,j );
808  y[j+1UL] += x[i] * A(i,j+1UL);
809  }
810  if( jend < N ) {
811  y[jend] += x[i] * A(i,jend);
812  }
813  }
814  }
816  //**********************************************************************************************
817 
818  //**Vectorized default addition assignment to dense vectors*************************************
832  template< typename VT1 // Type of the left-hand side target vector
833  , typename VT2 // Type of the left-hand side vector operand
834  , typename MT1 > // Type of the right-hand side matrix operand
835  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
836  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
837  {
838  typedef IntrinsicTrait<ElementType> IT;
839 
840  const size_t M( A.rows() );
841  const size_t N( A.columns() );
842 
843  size_t j( 0UL );
844 
845  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
846  IntrinsicType xmm1( load( &y[j ] ) );
847  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
848  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
849  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
850  IntrinsicType xmm5( load( &y[j+IT::size*4UL] ) );
851  IntrinsicType xmm6( load( &y[j+IT::size*5UL] ) );
852  IntrinsicType xmm7( load( &y[j+IT::size*6UL] ) );
853  IntrinsicType xmm8( load( &y[j+IT::size*7UL] ) );
854  for( size_t i=0UL; i<M; ++i ) {
855  const IntrinsicType x1( set( x[i] ) );
856  xmm1 = xmm1 + x1 * A.get(i,j );
857  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
858  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
859  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
860  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
861  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
862  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
863  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
864  }
865  store( &y[j ], xmm1 );
866  store( &y[j+IT::size ], xmm2 );
867  store( &y[j+IT::size*2UL], xmm3 );
868  store( &y[j+IT::size*3UL], xmm4 );
869  store( &y[j+IT::size*4UL], xmm5 );
870  store( &y[j+IT::size*5UL], xmm6 );
871  store( &y[j+IT::size*6UL], xmm7 );
872  store( &y[j+IT::size*7UL], xmm8 );
873  }
874  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
875  IntrinsicType xmm1( load( &y[j ] ) );
876  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
877  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
878  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
879  for( size_t i=0UL; i<M; ++i ) {
880  const IntrinsicType x1( set( x[i] ) );
881  xmm1 = xmm1 + x1 * A.get(i,j );
882  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
883  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
884  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
885  }
886  store( &y[j ], xmm1 );
887  store( &y[j+IT::size ], xmm2 );
888  store( &y[j+IT::size*2UL], xmm3 );
889  store( &y[j+IT::size*3UL], xmm4 );
890  }
891  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
892  IntrinsicType xmm1( load( &y[j ] ) );
893  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
894  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
895  for( size_t i=0UL; i<M; ++i ) {
896  const IntrinsicType x1( set( x[i] ) );
897  xmm1 = xmm1 + x1 * A.get(i,j );
898  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
899  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
900  }
901  store( &y[j ], xmm1 );
902  store( &y[j+IT::size ], xmm2 );
903  store( &y[j+IT::size*2UL], xmm3 );
904  }
905  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
906  IntrinsicType xmm1( load( &y[j ] ) );
907  IntrinsicType xmm2( load( &y[j+IT::size] ) );
908  for( size_t i=0UL; i<M; ++i ) {
909  const IntrinsicType x1( set( x[i] ) );
910  xmm1 = xmm1 + x1 * A.get(i,j );
911  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
912  }
913  store( &y[j ], xmm1 );
914  store( &y[j+IT::size], xmm2 );
915  }
916  if( j < N ) {
917  IntrinsicType xmm1( load( &y[j] ) );
918  for( size_t i=0UL; i<M; ++i ) {
919  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
920  }
921  store( &y[j], xmm1 );
922  }
923  }
925  //**********************************************************************************************
926 
927  //**BLAS-based addition assignment to dense vectors (default)***********************************
941  template< typename VT1 // Type of the left-hand side target vector
942  , typename VT2 // Type of the left-hand side vector operand
943  , typename MT1 > // Type of the right-hand side matrix operand
944  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
945  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
946  {
947  selectDefaultAddAssignKernel( y, x, A );
948  }
950  //**********************************************************************************************
951 
952  //**BLAS-based addition assignment to dense vectors (single precision)**************************
953 #if BLAZE_BLAS_MODE
954 
967  template< typename VT1 // Type of the left-hand side target vector
968  , typename VT2 // Type of the left-hand side vector operand
969  , typename MT1 > // Type of the right-hand side matrix operand
970  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
971  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
972  {
973  using boost::numeric_cast;
974 
975  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
976  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
977  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
978 
979  const int M ( numeric_cast<int>( A.rows() ) );
980  const int N ( numeric_cast<int>( A.columns() ) );
981  const int lda( numeric_cast<int>( A.spacing() ) );
982 
983  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
984  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
985  }
987 #endif
988  //**********************************************************************************************
989 
990  //**BLAS-based addition assignment to dense vectors (double precision)**************************
991 #if BLAZE_BLAS_MODE
992 
1005  template< typename VT1 // Type of the left-hand side target vector
1006  , typename VT2 // Type of the left-hand side vector operand
1007  , typename MT1 > // Type of the right-hand side matrix operand
1008  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1009  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1010  {
1011  using boost::numeric_cast;
1012 
1013  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1014  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1015  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1016 
1017  const int M ( numeric_cast<int>( A.rows() ) );
1018  const int N ( numeric_cast<int>( A.columns() ) );
1019  const int lda( numeric_cast<int>( A.spacing() ) );
1020 
1021  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
1022  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1023  }
1025 #endif
1026  //**********************************************************************************************
1027 
1028  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1029 #if BLAZE_BLAS_MODE
1030 
1043  template< typename VT1 // Type of the left-hand side target vector
1044  , typename VT2 // Type of the left-hand side vector operand
1045  , typename MT1 > // Type of the right-hand side matrix operand
1046  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1047  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1048  {
1049  using boost::numeric_cast;
1050 
1051  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1052  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1053  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1054  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1055  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1056  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1057 
1058  const int M ( numeric_cast<int>( A.rows() ) );
1059  const int N ( numeric_cast<int>( A.columns() ) );
1060  const int lda( numeric_cast<int>( A.spacing() ) );
1061  const complex<float> alpha( 1.0F, 0.0F );
1062  const complex<float> beta ( 1.0F, 0.0F );
1063 
1064  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1065  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1066  }
1068 #endif
1069  //**********************************************************************************************
1070 
1071  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1072 #if BLAZE_BLAS_MODE
1073 
1086  template< typename VT1 // Type of the left-hand side target vector
1087  , typename VT2 // Type of the left-hand side vector operand
1088  , typename MT1 > // Type of the right-hand side matrix operand
1089  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1090  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1091  {
1092  using boost::numeric_cast;
1093 
1094  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1095  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1096  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1097  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1098  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1099  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1100 
1101  const int M ( numeric_cast<int>( A.rows() ) );
1102  const int N ( numeric_cast<int>( A.columns() ) );
1103  const int lda( numeric_cast<int>( A.spacing() ) );
1104  const complex<double> alpha( 1.0, 0.0 );
1105  const complex<double> beta ( 1.0, 0.0 );
1106 
1107  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1108  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1109  }
1111 #endif
1112  //**********************************************************************************************
1113 
1114  //**Addition assignment to sparse vectors*******************************************************
1115  // No special implementation for the addition assignment to sparse vectors.
1116  //**********************************************************************************************
1117 
1118  //**Subtraction assignment to dense vectors*****************************************************
1131  template< typename VT1 > // Type of the target dense vector
1132  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1133  {
1135 
1136  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1137 
1138  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1139  return;
1140  }
1141 
1142  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1143  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1144 
1145  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1146  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1147  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1148  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1149 
1150  if( ( IsComputation<MT>::value && !evaluate ) ||
1151  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1152  TDVecDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1153  else
1154  TDVecDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1155  }
1157  //**********************************************************************************************
1158 
1159  //**Default subtraction assignment to dense vectors*********************************************
1173  template< typename VT1 // Type of the left-hand side target vector
1174  , typename VT2 // Type of the left-hand side vector operand
1175  , typename MT1 > // Type of the right-hand side matrix operand
1176  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1177  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1178  {
1179  const size_t M( A.rows() );
1180  const size_t N( A.columns() );
1181 
1182  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1183  const size_t jend( N & size_t(-2) );
1184 
1185  for( size_t i=0UL; i<M; ++i ) {
1186  for( size_t j=0UL; j<jend; j+=2UL ) {
1187  y[j ] -= x[i] * A(i,j );
1188  y[j+1UL] -= x[i] * A(i,j+1UL);
1189  }
1190  if( jend < N ) {
1191  y[jend] -= x[i] * A(i,jend);
1192  }
1193  }
1194  }
1196  //**********************************************************************************************
1197 
1198  //**Vectorized default subtraction assignment to dense vectors**********************************
1212  template< typename VT1 // Type of the left-hand side target vector
1213  , typename VT2 // Type of the left-hand side vector operand
1214  , typename MT1 > // Type of the right-hand side matrix operand
1215  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1216  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1217  {
1218  typedef IntrinsicTrait<ElementType> IT;
1219 
1220  const size_t M( A.rows() );
1221  const size_t N( A.columns() );
1222 
1223  size_t j( 0UL );
1224 
1225  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1226  IntrinsicType xmm1( load( &y[j ] ) );
1227  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1228  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1229  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
1230  IntrinsicType xmm5( load( &y[j+IT::size*4UL] ) );
1231  IntrinsicType xmm6( load( &y[j+IT::size*5UL] ) );
1232  IntrinsicType xmm7( load( &y[j+IT::size*6UL] ) );
1233  IntrinsicType xmm8( load( &y[j+IT::size*7UL] ) );
1234  for( size_t i=0UL; i<M; ++i ) {
1235  const IntrinsicType x1( set( x[i] ) );
1236  xmm1 = xmm1 - x1 * A.get(i,j );
1237  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1238  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1239  xmm4 = xmm4 - x1 * A.get(i,j+IT::size*3UL);
1240  xmm5 = xmm5 - x1 * A.get(i,j+IT::size*4UL);
1241  xmm6 = xmm6 - x1 * A.get(i,j+IT::size*5UL);
1242  xmm7 = xmm7 - x1 * A.get(i,j+IT::size*6UL);
1243  xmm8 = xmm8 - x1 * A.get(i,j+IT::size*7UL);
1244  }
1245  store( &y[j ], xmm1 );
1246  store( &y[j+IT::size ], xmm2 );
1247  store( &y[j+IT::size*2UL], xmm3 );
1248  store( &y[j+IT::size*3UL], xmm4 );
1249  store( &y[j+IT::size*4UL], xmm5 );
1250  store( &y[j+IT::size*5UL], xmm6 );
1251  store( &y[j+IT::size*6UL], xmm7 );
1252  store( &y[j+IT::size*7UL], xmm8 );
1253  }
1254  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1255  IntrinsicType xmm1( load( &y[j ] ) );
1256  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1257  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1258  IntrinsicType xmm4( load( &y[j+IT::size*3UL] ) );
1259  for( size_t i=0UL; i<M; ++i ) {
1260  const IntrinsicType x1( set( x[i] ) );
1261  xmm1 = xmm1 - x1 * A.get(i,j );
1262  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1263  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1264  xmm4 = xmm4 - x1 * A.get(i,j+IT::size*3UL);
1265  }
1266  store( &y[j ], xmm1 );
1267  store( &y[j+IT::size ], xmm2 );
1268  store( &y[j+IT::size*2UL], xmm3 );
1269  store( &y[j+IT::size*3UL], xmm4 );
1270  }
1271  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1272  IntrinsicType xmm1( load( &y[j ] ) );
1273  IntrinsicType xmm2( load( &y[j+IT::size ] ) );
1274  IntrinsicType xmm3( load( &y[j+IT::size*2UL] ) );
1275  for( size_t i=0UL; i<M; ++i ) {
1276  const IntrinsicType x1( set( x[i] ) );
1277  xmm1 = xmm1 - x1 * A.get(i,j );
1278  xmm2 = xmm2 - x1 * A.get(i,j+IT::size );
1279  xmm3 = xmm3 - x1 * A.get(i,j+IT::size*2UL);
1280  }
1281  store( &y[j ], xmm1 );
1282  store( &y[j+IT::size ], xmm2 );
1283  store( &y[j+IT::size*2UL], xmm3 );
1284  }
1285  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1286  IntrinsicType xmm1( load( &y[j ] ) );
1287  IntrinsicType xmm2( load( &y[j+IT::size] ) );
1288  for( size_t i=0UL; i<M; ++i ) {
1289  const IntrinsicType x1( set( x[i] ) );
1290  xmm1 = xmm1 - x1 * A.get(i,j );
1291  xmm2 = xmm2 - x1 * A.get(i,j+IT::size);
1292  }
1293  store( &y[j ], xmm1 );
1294  store( &y[j+IT::size], xmm2 );
1295  }
1296  if( j < N ) {
1297  IntrinsicType xmm1( load( &y[j] ) );
1298  for( size_t i=0UL; i<M; ++i ) {
1299  xmm1 = xmm1 - set( x[i] ) * A.get(i,j);
1300  }
1301  store( &y[j], xmm1 );
1302  }
1303  }
1305  //**********************************************************************************************
1306 
1307  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1321  template< typename VT1 // Type of the left-hand side target vector
1322  , typename VT2 // Type of the left-hand side vector operand
1323  , typename MT1 > // Type of the right-hand side matrix operand
1324  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1325  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1326  {
1327  selectDefaultSubAssignKernel( y, x, A );
1328  }
1330  //**********************************************************************************************
1331 
1332  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1333 #if BLAZE_BLAS_MODE
1334 
1347  template< typename VT1 // Type of the left-hand side target vector
1348  , typename VT2 // Type of the left-hand side vector operand
1349  , typename MT1 > // Type of the right-hand side matrix operand
1350  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1351  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1352  {
1353  using boost::numeric_cast;
1354 
1355  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
1356  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
1357  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
1358 
1359  const int M ( numeric_cast<int>( A.rows() ) );
1360  const int N ( numeric_cast<int>( A.columns() ) );
1361  const int lda( numeric_cast<int>( A.spacing() ) );
1362 
1363  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -1.0F,
1364  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1365  }
1367 #endif
1368  //**********************************************************************************************
1369 
1370  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1371 #if BLAZE_BLAS_MODE
1372 
1385  template< typename VT1 // Type of the left-hand side target vector
1386  , typename VT2 // Type of the left-hand side vector operand
1387  , typename MT1 > // Type of the right-hand side matrix operand
1388  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1389  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1390  {
1391  using boost::numeric_cast;
1392 
1393  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
1394  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
1395  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
1396 
1397  const int M ( numeric_cast<int>( A.rows() ) );
1398  const int N ( numeric_cast<int>( A.columns() ) );
1399  const int lda( numeric_cast<int>( A.spacing() ) );
1400 
1401  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -1.0,
1402  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1403  }
1405 #endif
1406  //**********************************************************************************************
1407 
1408  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1409 #if BLAZE_BLAS_MODE
1410 
1423  template< typename VT1 // Type of the left-hand side target vector
1424  , typename VT2 // Type of the left-hand side vector operand
1425  , typename MT1 > // Type of the right-hand side matrix operand
1426  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1427  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1428  {
1429  using boost::numeric_cast;
1430 
1431  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1432  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1433  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1434  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1435  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1436  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1437 
1438  const int M ( numeric_cast<int>( A.rows() ) );
1439  const int N ( numeric_cast<int>( A.columns() ) );
1440  const int lda( numeric_cast<int>( A.spacing() ) );
1441  const complex<float> alpha( -1.0F, 0.0F );
1442  const complex<float> beta ( 1.0F, 0.0F );
1443 
1444  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1445  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1446  }
1448 #endif
1449  //**********************************************************************************************
1450 
1451  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1452 #if BLAZE_BLAS_MODE
1453 
1466  template< typename VT1 // Type of the left-hand side target vector
1467  , typename VT2 // Type of the left-hand side vector operand
1468  , typename MT1 > // Type of the right-hand side matrix operand
1469  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1470  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1471  {
1472  using boost::numeric_cast;
1473 
1474  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
1475  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
1476  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
1477  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1478  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1479  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1480 
1481  const int M ( numeric_cast<int>( A.rows() ) );
1482  const int N ( numeric_cast<int>( A.columns() ) );
1483  const int lda( numeric_cast<int>( A.spacing() ) );
1484  const complex<double> alpha( -1.0, 0.0 );
1485  const complex<double> beta ( 1.0, 0.0 );
1486 
1487  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1488  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1489  }
1491 #endif
1492  //**********************************************************************************************
1493 
1494  //**Subtraction assignment to sparse vectors****************************************************
1495  // No special implementation for the subtraction assignment to sparse vectors.
1496  //**********************************************************************************************
1497 
1498  //**Multiplication assignment to dense vectors**************************************************
1511  template< typename VT1 > // Type of the target dense vector
1512  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1513  {
1515 
1518  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
1519 
1520  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1521 
1522  const ResultType tmp( rhs );
1523  multAssign( ~lhs, tmp );
1524  }
1526  //**********************************************************************************************
1527 
1528  //**Multiplication assignment to sparse vectors*******************************************************
1529  // No special implementation for the multiplication assignment to sparse vectors.
1530  //**********************************************************************************************
1531 
1532  //**Compile time checks*************************************************************************
1539  //**********************************************************************************************
1540 };
1541 //*************************************************************************************************
1542 
1543 
1544 
1545 
1546 //=================================================================================================
1547 //
1548 // DVECSCALARMULTEXPR SPECIALIZATION
1549 //
1550 //=================================================================================================
1551 
1552 //*************************************************************************************************
1560 template< typename VT // Type of the left-hand side dense vector
1561  , typename MT // Type of the right-hand side dense matrix
1562  , typename ST > // Type of the side scalar value
1563 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
1564  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1565  , private VecScalarMultExpr
1566  , private Computation
1567 {
1568  private:
1569  //**Type definitions****************************************************************************
1570  typedef TDVecDMatMultExpr<VT,MT> VMM;
1571  typedef typename VMM::ResultType RES;
1572  typedef typename VT::ResultType VRT;
1573  typedef typename MT::ResultType MRT;
1574  typedef typename VRT::ElementType VET;
1575  typedef typename MRT::ElementType MET;
1576  typedef typename VT::CompositeType VCT;
1577  typedef typename MT::CompositeType MCT;
1578  //**********************************************************************************************
1579 
1580  //**********************************************************************************************
1582  enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1583  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1584  //**********************************************************************************************
1585 
1586  //**********************************************************************************************
1588 
1591  template< typename T1, typename T2, typename T3, typename T4 >
1592  struct UseSinglePrecisionKernel {
1593  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1594  IsFloat<typename T1::ElementType>::value &&
1595  IsFloat<typename T2::ElementType>::value &&
1596  IsFloat<typename T3::ElementType>::value &&
1597  !IsComplex<T4>::value };
1598  };
1599  //**********************************************************************************************
1600 
1601  //**********************************************************************************************
1603 
1606  template< typename T1, typename T2, typename T3, typename T4 >
1607  struct UseDoublePrecisionKernel {
1608  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1609  IsDouble<typename T1::ElementType>::value &&
1610  IsDouble<typename T2::ElementType>::value &&
1611  IsDouble<typename T3::ElementType>::value &&
1612  !IsComplex<T4>::value };
1613  };
1614  //**********************************************************************************************
1615 
1616  //**********************************************************************************************
1618 
1621  template< typename T1, typename T2, typename T3 >
1622  struct UseSinglePrecisionComplexKernel {
1623  typedef complex<float> Type;
1624  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1625  IsSame<typename T1::ElementType,Type>::value &&
1626  IsSame<typename T2::ElementType,Type>::value &&
1627  IsSame<typename T3::ElementType,Type>::value };
1628  };
1629  //**********************************************************************************************
1630 
1631  //**********************************************************************************************
1633 
1636  template< typename T1, typename T2, typename T3 >
1637  struct UseDoublePrecisionComplexKernel {
1638  typedef complex<double> Type;
1639  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1640  IsSame<typename T1::ElementType,Type>::value &&
1641  IsSame<typename T2::ElementType,Type>::value &&
1642  IsSame<typename T3::ElementType,Type>::value };
1643  };
1644  //**********************************************************************************************
1645 
1646  //**********************************************************************************************
1648 
1650  template< typename T1, typename T2, typename T3, typename T4 >
1651  struct UseDefaultKernel {
1652  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1653  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1654  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1655  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1656  };
1657  //**********************************************************************************************
1658 
1659  //**********************************************************************************************
1661 
1664  template< typename T1, typename T2, typename T3, typename T4 >
1665  struct UseVectorizedDefaultKernel {
1666  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1667  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1668  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1669  IsSame<typename T1::ElementType,T4>::value &&
1670  IntrinsicTrait<typename T1::ElementType>::addition &&
1671  IntrinsicTrait<typename T1::ElementType>::multiplication };
1672  };
1673  //**********************************************************************************************
1674 
1675  public:
1676  //**Type definitions****************************************************************************
1677  typedef DVecScalarMultExpr<VMM,ST,true> This;
1678  typedef typename MultTrait<RES,ST>::Type ResultType;
1679  typedef typename ResultType::TransposeType TransposeType;
1680  typedef typename ResultType::ElementType ElementType;
1681  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1682  typedef const ElementType ReturnType;
1683  typedef const ResultType CompositeType;
1684 
1686  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
1687 
1689  typedef ST RightOperand;
1690 
1692  typedef typename SelectType< IsComputation<VT>::value, const VRT, VCT >::Type LT;
1693 
1695  typedef typename SelectType< evaluate, const MRT, MCT >::Type RT;
1696  //**********************************************************************************************
1697 
1698  //**Compilation flags***************************************************************************
1700  enum { vectorizable = 0 };
1701  //**********************************************************************************************
1702 
1703  //**Constructor*********************************************************************************
1709  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1710  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1711  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1712  {}
1713  //**********************************************************************************************
1714 
1715  //**Subscript operator**************************************************************************
1721  inline ReturnType operator[]( size_t index ) const {
1722  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1723  return vector_[index] * scalar_;
1724  }
1725  //**********************************************************************************************
1726 
1727  //**Size function*******************************************************************************
1732  inline size_t size() const {
1733  return vector_.size();
1734  }
1735  //**********************************************************************************************
1736 
1737  //**Left operand access*************************************************************************
1742  inline LeftOperand leftOperand() const {
1743  return vector_;
1744  }
1745  //**********************************************************************************************
1746 
1747  //**Right operand access************************************************************************
1752  inline RightOperand rightOperand() const {
1753  return scalar_;
1754  }
1755  //**********************************************************************************************
1756 
1757  //**********************************************************************************************
1763  template< typename T >
1764  inline bool canAlias( const T* alias ) const {
1765  return vector_.canAlias( alias );
1766  }
1767  //**********************************************************************************************
1768 
1769  //**********************************************************************************************
1775  template< typename T >
1776  inline bool isAliased( const T* alias ) const {
1777  return vector_.isAliased( alias );
1778  }
1779  //**********************************************************************************************
1780 
1781  private:
1782  //**Member variables****************************************************************************
1783  LeftOperand vector_;
1784  RightOperand scalar_;
1785  //**********************************************************************************************
1786 
1787  //**Assignment to dense vectors*****************************************************************
1799  template< typename VT1 > // Type of the target dense vector
1800  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
1801  {
1803 
1804  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1805 
1806  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1807  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1808 
1809  if( right.rows() == 0UL ) {
1810  reset( ~lhs );
1811  return;
1812  }
1813  else if( right.columns() == 0UL ) {
1814  return;
1815  }
1816 
1817  LT x( left ); // Evaluation of the left-hand side dense vector operand
1818  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1819 
1820  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1821  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1822  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1823  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1824 
1825  if( ( IsComputation<MT>::value && !evaluate ) ||
1826  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1827  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1828  else
1829  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1830  }
1831  //**********************************************************************************************
1832 
1833  //**Default assignment to dense vectors*********************************************************
1847  template< typename VT1 // Type of the left-hand side target vector
1848  , typename VT2 // Type of the left-hand side vector operand
1849  , typename MT1 // Type of the right-hand side matrix operand
1850  , typename ST2 > // Type of the scalar value
1851  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1852  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1853  {
1854  const size_t M( A.rows() );
1855  const size_t N( A.columns() );
1856 
1857  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1858  const size_t jend( N & size_t(-2) );
1859 
1860  for( size_t j=0UL; j<N; ++j ) {
1861  y[j] = x[0UL] * A(0UL,j);
1862  }
1863  for( size_t i=1UL; i<M; ++i ) {
1864  for( size_t j=0UL; j<jend; j+=2UL ) {
1865  y[j ] += x[i] * A(i,j );
1866  y[j+1UL] += x[i] * A(i,j+1UL);
1867  }
1868  if( jend < N ) {
1869  y[jend] += x[i] * A(i,jend);
1870  }
1871  }
1872  for( size_t j=0UL; j<N; ++j ) {
1873  y[j] *= scalar;
1874  }
1875  }
1876  //**********************************************************************************************
1877 
1878  //**Default assignment to dense vectors*********************************************************
1892  template< typename VT1 // Type of the left-hand side target vector
1893  , typename VT2 // Type of the left-hand side vector operand
1894  , typename MT1 // Type of the right-hand side matrix operand
1895  , typename ST2 > // Type of the scalar value
1896  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1897  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1898  {
1899  typedef IntrinsicTrait<ElementType> IT;
1900 
1901  const size_t M( A.rows() );
1902  const size_t N( A.columns() );
1903 
1904  const IntrinsicType factor( set( scalar ) );
1905 
1906  size_t j( 0UL );
1907 
1908  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1909  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1910  for( size_t i=0UL; i<M; ++i ) {
1911  const IntrinsicType x1( set( x[i] ) );
1912  xmm1 = xmm1 + x1 * A.get(i,j );
1913  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1914  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1915  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
1916  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
1917  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
1918  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
1919  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
1920  }
1921  store( &y[j ], xmm1*factor );
1922  store( &y[j+IT::size ], xmm2*factor );
1923  store( &y[j+IT::size*2UL], xmm3*factor );
1924  store( &y[j+IT::size*3UL], xmm4*factor );
1925  store( &y[j+IT::size*4UL], xmm5*factor );
1926  store( &y[j+IT::size*5UL], xmm6*factor );
1927  store( &y[j+IT::size*6UL], xmm7*factor );
1928  store( &y[j+IT::size*7UL], xmm8*factor );
1929  }
1930  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1931  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1932  for( size_t i=0UL; i<M; ++i ) {
1933  const IntrinsicType x1( set( x[i] ) );
1934  xmm1 = xmm1 + x1 * A.get(i,j );
1935  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1936  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1937  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
1938  }
1939  store( &y[j ], xmm1*factor );
1940  store( &y[j+IT::size ], xmm2*factor );
1941  store( &y[j+IT::size*2UL], xmm3*factor );
1942  store( &y[j+IT::size*3UL], xmm4*factor );
1943  }
1944  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1945  IntrinsicType xmm1, xmm2, xmm3;
1946  for( size_t i=0UL; i<M; ++i ) {
1947  const IntrinsicType x1( set( x[i] ) );
1948  xmm1 = xmm1 + x1 * A.get(i,j );
1949  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
1950  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
1951  }
1952  store( &y[j ], xmm1*factor );
1953  store( &y[j+IT::size ], xmm2*factor );
1954  store( &y[j+IT::size*2UL], xmm3*factor );
1955  }
1956  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1957  IntrinsicType xmm1, xmm2;
1958  for( size_t i=0UL; i<M; ++i ) {
1959  const IntrinsicType x1( set( x[i] ) );
1960  xmm1 = xmm1 + x1 * A.get(i,j );
1961  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
1962  }
1963  store( &y[j ], xmm1*factor );
1964  store( &y[j+IT::size], xmm2*factor );
1965  }
1966  if( j < N ) {
1967  IntrinsicType xmm1;
1968  for( size_t i=0UL; i<M; ++i ) {
1969  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
1970  }
1971  store( &y[j], xmm1*factor );
1972  }
1973  }
1974  //**********************************************************************************************
1975 
1976  //**BLAS-based assignment to dense vectors (default)********************************************
1989  template< typename VT1 // Type of the left-hand side target vector
1990  , typename VT2 // Type of the left-hand side vector operand
1991  , typename MT1 // Type of the right-hand side matrix operand
1992  , typename ST2 > // Type of the scalar value
1993  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1994  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1995  {
1996  selectDefaultAssignKernel( y, x, A, scalar );
1997  }
1998  //**********************************************************************************************
1999 
2000  //**BLAS-based assignment to dense vectors (single precision)***********************************
2001 #if BLAZE_BLAS_MODE
2002 
2015  template< typename VT1 // Type of the left-hand side target vector
2016  , typename VT2 // Type of the left-hand side vector operand
2017  , typename MT1 // Type of the right-hand side matrix operand
2018  , typename ST2 > // Type of the scalar value
2019  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2020  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2021  {
2022  using boost::numeric_cast;
2023 
2024  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2025  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2026  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2027 
2028  const int M ( numeric_cast<int>( A.rows() ) );
2029  const int N ( numeric_cast<int>( A.columns() ) );
2030  const int lda( numeric_cast<int>( A.spacing() ) );
2031 
2032  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2033  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2034  }
2035 #endif
2036  //**********************************************************************************************
2037 
2038  //**BLAS-based assignment to dense vectors (double precision)***********************************
2039 #if BLAZE_BLAS_MODE
2040 
2053  template< typename VT1 // Type of the left-hand side target vector
2054  , typename VT2 // Type of the left-hand side vector operand
2055  , typename MT1 // Type of the right-hand side matrix operand
2056  , typename ST2 > // Type of the scalar value
2057  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2058  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2059  {
2060  using boost::numeric_cast;
2061 
2062  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2063  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2064  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2065 
2066  const int M ( numeric_cast<int>( A.rows() ) );
2067  const int N ( numeric_cast<int>( A.columns() ) );
2068  const int lda( numeric_cast<int>( A.spacing() ) );
2069 
2070  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2071  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2072  }
2073 #endif
2074  //**********************************************************************************************
2075 
2076  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2077 #if BLAZE_BLAS_MODE
2078 
2091  template< typename VT1 // Type of the left-hand side target vector
2092  , typename VT2 // Type of the left-hand side vector operand
2093  , typename MT1 // Type of the right-hand side matrix operand
2094  , typename ST2 > // Type of the scalar value
2095  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2096  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2097  {
2098  using boost::numeric_cast;
2099 
2100  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2101  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2102  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2103  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2104  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2105  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2106 
2107  const int M ( numeric_cast<int>( A.rows() ) );
2108  const int N ( numeric_cast<int>( A.columns() ) );
2109  const int lda( numeric_cast<int>( A.spacing() ) );
2110  const complex<float> alpha( scalar );
2111  const complex<float> beta ( 0.0F, 0.0F );
2112 
2113  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2114  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2115  }
2116 #endif
2117  //**********************************************************************************************
2118 
2119  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2120 #if BLAZE_BLAS_MODE
2121 
2134  template< typename VT1 // Type of the left-hand side target vector
2135  , typename VT2 // Type of the left-hand side vector operand
2136  , typename MT1 // Type of the right-hand side matrix operand
2137  , typename ST2 > // Type of the scalar value
2138  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2139  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2140  {
2141  using boost::numeric_cast;
2142 
2143  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2144  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2145  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2146  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2147  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2148  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2149 
2150  const int M ( numeric_cast<int>( A.rows() ) );
2151  const int N ( numeric_cast<int>( A.columns() ) );
2152  const int lda( numeric_cast<int>( A.spacing() ) );
2153  const complex<double> alpha( scalar );
2154  const complex<double> beta ( 0.0, 0.0 );
2155 
2156  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2157  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2158  }
2159 #endif
2160  //**********************************************************************************************
2161 
2162  //**Assignment to sparse vectors****************************************************************
2174  template< typename VT1 > // Type of the target sparse vector
2175  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2176  {
2178 
2181  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2182 
2183  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2184 
2185  const ResultType tmp( rhs );
2186  assign( ~lhs, tmp );
2187  }
2188  //**********************************************************************************************
2189 
2190  //**Addition assignment to dense vectors********************************************************
2202  template< typename VT1 > // Type of the target dense vector
2203  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2204  {
2206 
2207  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2208 
2209  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2210  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2211 
2212  if( right.rows() == 0UL || right.columns() == 0UL ) {
2213  return;
2214  }
2215 
2216  LT x( left ); // Evaluation of the left-hand side dense vector operand
2217  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2218 
2219  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2220  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2221  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2222  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2223 
2224  if( ( IsComputation<MT>::value && !evaluate ) ||
2225  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2226  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2227  else
2228  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2229  }
2230  //**********************************************************************************************
2231 
2232  //**Default addition assignment to dense vectors************************************************
2246  template< typename VT1 // Type of the left-hand side target vector
2247  , typename VT2 // Type of the left-hand side vector operand
2248  , typename MT1 // Type of the right-hand side matrix operand
2249  , typename ST2 > // Type of the scalar value
2250  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2251  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2252  {
2253  y.addAssign( x * A * scalar );
2254  }
2255  //**********************************************************************************************
2256 
2257  //**Vectorized default addition assignment to dense vectors*************************************
2271  template< typename VT1 // Type of the left-hand side target vector
2272  , typename VT2 // Type of the left-hand side vector operand
2273  , typename MT1 // Type of the right-hand side matrix operand
2274  , typename ST2 > // Type of the scalar value
2275  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2276  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2277  {
2278  typedef IntrinsicTrait<ElementType> IT;
2279 
2280  const size_t M( A.rows() );
2281  const size_t N( A.columns() );
2282 
2283  const IntrinsicType factor( set( scalar ) );
2284 
2285  size_t j( 0UL );
2286 
2287  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2288  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2289  for( size_t i=0UL; i<M; ++i ) {
2290  const IntrinsicType x1( set( x[i] ) );
2291  xmm1 = xmm1 + x1 * A.get(i,j );
2292  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2293  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2294  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2295  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
2296  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
2297  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
2298  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
2299  }
2300  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2301  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2302  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2303  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) + xmm4*factor );
2304  store( &y[j+IT::size*4UL], load( &y[j+IT::size*4UL] ) + xmm5*factor );
2305  store( &y[j+IT::size*5UL], load( &y[j+IT::size*5UL] ) + xmm6*factor );
2306  store( &y[j+IT::size*6UL], load( &y[j+IT::size*6UL] ) + xmm7*factor );
2307  store( &y[j+IT::size*7UL], load( &y[j+IT::size*7UL] ) + xmm8*factor );
2308  }
2309  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2310  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2311  for( size_t i=0UL; i<M; ++i ) {
2312  const IntrinsicType x1( set( x[i] ) );
2313  xmm1 = xmm1 + x1 * A.get(i,j );
2314  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2315  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2316  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2317  }
2318  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2319  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2320  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2321  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) + xmm4*factor );
2322  }
2323  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2324  IntrinsicType xmm1, xmm2, xmm3;
2325  for( size_t i=0UL; i<M; ++i ) {
2326  const IntrinsicType x1( set( x[i] ) );
2327  xmm1 = xmm1 + x1 * A.get(i,j );
2328  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2329  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2330  }
2331  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2332  store( &y[j+IT::size ], load( &y[j+IT::size ] ) + xmm2*factor );
2333  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) + xmm3*factor );
2334  }
2335  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2336  IntrinsicType xmm1, xmm2;
2337  for( size_t i=0UL; i<M; ++i ) {
2338  const IntrinsicType x1( set( x[i] ) );
2339  xmm1 = xmm1 + x1 * A.get(i,j );
2340  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
2341  }
2342  store( &y[j ], load( &y[j ] ) + xmm1*factor );
2343  store( &y[j+IT::size], load( &y[j+IT::size] ) + xmm2*factor );
2344  }
2345  if( j < N ) {
2346  IntrinsicType xmm1;
2347  for( size_t i=0UL; i<M; ++i ) {
2348  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
2349  }
2350  store( &y[j], load( &y[j] ) + xmm1*factor );
2351  }
2352  }
2353  //**********************************************************************************************
2354 
2355  //**BLAS-based addition assignment to dense vectors (default)***********************************
2369  template< typename VT1 // Type of the left-hand side target vector
2370  , typename VT2 // Type of the left-hand side vector operand
2371  , typename MT1 // Type of the right-hand side matrix operand
2372  , typename ST2 > // Type of the scalar value
2373  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2374  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2375  {
2376  selectDefaultAddAssignKernel( y, x, A, scalar );
2377  }
2378  //**********************************************************************************************
2379 
2380  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2381 #if BLAZE_BLAS_MODE
2382 
2395  template< typename VT1 // Type of the left-hand side target vector
2396  , typename VT2 // Type of the left-hand side vector operand
2397  , typename MT1 // Type of the right-hand side matrix operand
2398  , typename ST2 > // Type of the scalar value
2399  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2400  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2401  {
2402  using boost::numeric_cast;
2403 
2404  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2405  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2406  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2407 
2408  const int M ( numeric_cast<int>( A.rows() ) );
2409  const int N ( numeric_cast<int>( A.columns() ) );
2410  const int lda( numeric_cast<int>( A.spacing() ) );
2411 
2412  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2413  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2414  }
2415 #endif
2416  //**********************************************************************************************
2417 
2418  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2419 #if BLAZE_BLAS_MODE
2420 
2433  template< typename VT1 // Type of the left-hand side target vector
2434  , typename VT2 // Type of the left-hand side vector operand
2435  , typename MT1 // Type of the right-hand side matrix operand
2436  , typename ST2 > // Type of the scalar value
2437  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2438  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2439  {
2440  using boost::numeric_cast;
2441 
2442  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2443  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2444  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2445 
2446  const int M ( numeric_cast<int>( A.rows() ) );
2447  const int N ( numeric_cast<int>( A.columns() ) );
2448  const int lda( numeric_cast<int>( A.spacing() ) );
2449 
2450  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2451  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2452  }
2453 #endif
2454  //**********************************************************************************************
2455 
2456  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2457 #if BLAZE_BLAS_MODE
2458 
2471  template< typename VT1 // Type of the left-hand side target vector
2472  , typename VT2 // Type of the left-hand side vector operand
2473  , typename MT1 // Type of the right-hand side matrix operand
2474  , typename ST2 > // Type of the scalar value
2475  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2476  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2477  {
2478  using boost::numeric_cast;
2479 
2480  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2481  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2482  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2483  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2484  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2485  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2486 
2487  const int M ( numeric_cast<int>( A.rows() ) );
2488  const int N ( numeric_cast<int>( A.columns() ) );
2489  const int lda( numeric_cast<int>( A.spacing() ) );
2490  const complex<float> alpha( scalar );
2491  const complex<float> beta ( 1.0F, 0.0F );
2492 
2493  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2494  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2495  }
2496 #endif
2497  //**********************************************************************************************
2498 
2499  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2500 #if BLAZE_BLAS_MODE
2501 
2514  template< typename VT1 // Type of the left-hand side target vector
2515  , typename VT2 // Type of the left-hand side vector operand
2516  , typename MT1 // Type of the right-hand side matrix operand
2517  , typename ST2 > // Type of the scalar value
2518  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2519  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2520  {
2521  using boost::numeric_cast;
2522 
2523  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2524  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2525  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2526  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2527  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2528  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2529 
2530  const int M ( numeric_cast<int>( A.rows() ) );
2531  const int N ( numeric_cast<int>( A.columns() ) );
2532  const int lda( numeric_cast<int>( A.spacing() ) );
2533  const complex<double> alpha( scalar );
2534  const complex<double> beta ( 1.0, 0.0 );
2535 
2536  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2537  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2538  }
2539 #endif
2540  //**********************************************************************************************
2541 
2542  //**Addition assignment to sparse vectors*******************************************************
2543  // No special implementation for the addition assignment to sparse vectors.
2544  //**********************************************************************************************
2545 
2546  //**Subtraction assignment to dense vectors*****************************************************
2558  template< typename VT1 > // Type of the target dense vector
2559  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2560  {
2562 
2563  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2564 
2565  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2566  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2567 
2568  if( right.rows() == 0UL || right.columns() == 0UL ) {
2569  return;
2570  }
2571 
2572  LT x( left ); // Evaluation of the left-hand side dense vector operand
2573  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2574 
2575  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2576  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2577  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2578  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2579 
2580  if( ( IsComputation<MT>::value && !evaluate ) ||
2581  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2582  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2583  else
2584  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2585  }
2586  //**********************************************************************************************
2587 
2588  //**Default subtraction assignment to dense vectors*********************************************
2602  template< typename VT1 // Type of the left-hand side target vector
2603  , typename VT2 // Type of the left-hand side vector operand
2604  , typename MT1 // Type of the right-hand side matrix operand
2605  , typename ST2 > // Type of the scalar value
2606  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2607  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2608  {
2609  y.subAssign( x * A * scalar );
2610  }
2611  //**********************************************************************************************
2612 
2613  //**Vectorized default subtraction assignment to dense vectors**********************************
2627  template< typename VT1 // Type of the left-hand side target vector
2628  , typename VT2 // Type of the left-hand side vector operand
2629  , typename MT1 // Type of the right-hand side matrix operand
2630  , typename ST2 > // Type of the scalar value
2631  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2632  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2633  {
2634  typedef IntrinsicTrait<ElementType> IT;
2635 
2636  const size_t M( A.rows() );
2637  const size_t N( A.columns() );
2638 
2639  const IntrinsicType factor( set( scalar ) );
2640 
2641  size_t j( 0UL );
2642 
2643  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2644  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2645  for( size_t i=0UL; i<M; ++i ) {
2646  const IntrinsicType x1( set( x[i] ) );
2647  xmm1 = xmm1 + x1 * A.get(i,j );
2648  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2649  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2650  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2651  xmm5 = xmm5 + x1 * A.get(i,j+IT::size*4UL);
2652  xmm6 = xmm6 + x1 * A.get(i,j+IT::size*5UL);
2653  xmm7 = xmm7 + x1 * A.get(i,j+IT::size*6UL);
2654  xmm8 = xmm8 + x1 * A.get(i,j+IT::size*7UL);
2655  }
2656  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2657  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2658  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2659  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) - xmm4*factor );
2660  store( &y[j+IT::size*4UL], load( &y[j+IT::size*4UL] ) - xmm5*factor );
2661  store( &y[j+IT::size*5UL], load( &y[j+IT::size*5UL] ) - xmm6*factor );
2662  store( &y[j+IT::size*6UL], load( &y[j+IT::size*6UL] ) - xmm7*factor );
2663  store( &y[j+IT::size*7UL], load( &y[j+IT::size*7UL] ) - xmm8*factor );
2664  }
2665  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2666  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2667  for( size_t i=0UL; i<M; ++i ) {
2668  const IntrinsicType x1( set( x[i] ) );
2669  xmm1 = xmm1 + x1 * A.get(i,j );
2670  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2671  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2672  xmm4 = xmm4 + x1 * A.get(i,j+IT::size*3UL);
2673  }
2674  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2675  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2676  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2677  store( &y[j+IT::size*3UL], load( &y[j+IT::size*3UL] ) - xmm4*factor );
2678  }
2679  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2680  IntrinsicType xmm1, xmm2, xmm3;
2681  for( size_t i=0UL; i<M; ++i ) {
2682  const IntrinsicType x1( set( x[i] ) );
2683  xmm1 = xmm1 + x1 * A.get(i,j );
2684  xmm2 = xmm2 + x1 * A.get(i,j+IT::size );
2685  xmm3 = xmm3 + x1 * A.get(i,j+IT::size*2UL);
2686  }
2687  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2688  store( &y[j+IT::size ], load( &y[j+IT::size ] ) - xmm2*factor );
2689  store( &y[j+IT::size*2UL], load( &y[j+IT::size*2UL] ) - xmm3*factor );
2690  }
2691  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2692  IntrinsicType xmm1, xmm2;
2693  for( size_t i=0UL; i<M; ++i ) {
2694  const IntrinsicType x1( set( x[i] ) );
2695  xmm1 = xmm1 + x1 * A.get(i,j );
2696  xmm2 = xmm2 + x1 * A.get(i,j+IT::size);
2697  }
2698  store( &y[j ], load( &y[j ] ) - xmm1*factor );
2699  store( &y[j+IT::size], load( &y[j+IT::size] ) - xmm2*factor );
2700  }
2701  if( j < N ) {
2702  IntrinsicType xmm1;
2703  for( size_t i=0UL; i<M; ++i ) {
2704  xmm1 = xmm1 + set( x[i] ) * A.get(i,j);
2705  }
2706  store( &y[j], load( &y[j] ) - xmm1*factor );
2707  }
2708  }
2709  //**********************************************************************************************
2710 
2711  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2725  template< typename VT1 // Type of the left-hand side target vector
2726  , typename VT2 // Type of the left-hand side vector operand
2727  , typename MT1 // Type of the right-hand side matrix operand
2728  , typename ST2 > // Type of the scalar value
2729  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2730  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2731  {
2732  selectDefaultSubAssignKernel( y, x, A, scalar );
2733  }
2734  //**********************************************************************************************
2735 
2736  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2737 #if BLAZE_BLAS_MODE
2738 
2751  template< typename VT1 // Type of the left-hand side target vector
2752  , typename VT2 // Type of the left-hand side vector operand
2753  , typename MT1 // Type of the right-hand side matrix operand
2754  , typename ST2 > // Type of the scalar value
2755  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2756  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2757  {
2758  using boost::numeric_cast;
2759 
2760  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT1::ElementType );
2761  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename VT2::ElementType );
2762  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT1::ElementType );
2763 
2764  const int M ( numeric_cast<int>( A.rows() ) );
2765  const int N ( numeric_cast<int>( A.columns() ) );
2766  const int lda( numeric_cast<int>( A.spacing() ) );
2767 
2768  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2769  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2770  }
2771 #endif
2772  //**********************************************************************************************
2773 
2774  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2775 #if BLAZE_BLAS_MODE
2776 
2789  template< typename VT1 // Type of the left-hand side target vector
2790  , typename VT2 // Type of the left-hand side vector operand
2791  , typename MT1 // Type of the right-hand side matrix operand
2792  , typename ST2 > // Type of the scalar value
2793  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2794  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2795  {
2796  using boost::numeric_cast;
2797 
2798  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT1::ElementType );
2799  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename VT2::ElementType );
2800  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT1::ElementType );
2801 
2802  const int M ( numeric_cast<int>( A.rows() ) );
2803  const int N ( numeric_cast<int>( A.columns() ) );
2804  const int lda( numeric_cast<int>( A.spacing() ) );
2805 
2806  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2807  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2808  }
2809 #endif
2810  //**********************************************************************************************
2811 
2812  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2813 #if BLAZE_BLAS_MODE
2814 
2827  template< typename VT1 // Type of the left-hand side target vector
2828  , typename VT2 // Type of the left-hand side vector operand
2829  , typename MT1 // Type of the right-hand side matrix operand
2830  , typename ST2 > // Type of the scalar value
2831  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2832  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2833  {
2834  using boost::numeric_cast;
2835 
2836  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2837  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2838  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2839  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2840  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2841  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2842 
2843  const int M ( numeric_cast<int>( A.rows() ) );
2844  const int N ( numeric_cast<int>( A.columns() ) );
2845  const int lda( numeric_cast<int>( A.spacing() ) );
2846  const complex<float> alpha( -scalar );
2847  const complex<float> beta ( 1.0F, 0.0F );
2848 
2849  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2850  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2851  }
2852 #endif
2853  //**********************************************************************************************
2854 
2855  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2856 #if BLAZE_BLAS_MODE
2857 
2870  template< typename VT1 // Type of the left-hand side target vector
2871  , typename VT2 // Type of the left-hand side vector operand
2872  , typename MT1 // Type of the right-hand side matrix operand
2873  , typename ST2 > // Type of the scalar value
2874  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2875  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2876  {
2877  using boost::numeric_cast;
2878 
2879  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT1::ElementType );
2880  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename VT2::ElementType );
2881  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT1::ElementType );
2882  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2883  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2884  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2885 
2886  const int M ( numeric_cast<int>( A.rows() ) );
2887  const int N ( numeric_cast<int>( A.columns() ) );
2888  const int lda( numeric_cast<int>( A.spacing() ) );
2889  const complex<double> alpha( -scalar );
2890  const complex<double> beta ( 1.0, 0.0 );
2891 
2892  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2893  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2894  }
2895 #endif
2896  //**********************************************************************************************
2897 
2898  //**Subtraction assignment to sparse vectors****************************************************
2899  // No special implementation for the subtraction assignment to sparse vectors.
2900  //**********************************************************************************************
2901 
2902  //**Multiplication assignment to dense vectors**************************************************
2914  template< typename VT1 > // Type of the target dense vector
2915  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2916  {
2918 
2921  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename ResultType::CompositeType );
2922 
2923  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2924 
2925  const ResultType tmp( rhs );
2926  multAssign( ~lhs, tmp );
2927  }
2928  //**********************************************************************************************
2929 
2930  //**Multiplication assignment to sparse vectors*******************************************************
2931  // No special implementation for the multiplication assignment to sparse vectors.
2932  //**********************************************************************************************
2933 
2934  //**Compile time checks*************************************************************************
2943  //**********************************************************************************************
2944 };
2946 //*************************************************************************************************
2947 
2948 
2949 
2950 
2951 //=================================================================================================
2952 //
2953 // GLOBAL BINARY ARITHMETIC OPERATORS
2954 //
2955 //=================================================================================================
2956 
2957 //*************************************************************************************************
2988 template< typename T1 // Type of the left-hand side dense vector
2989  , typename T2 > // Type of the right-hand side dense matrix
2990 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
2992 {
2994 
2995  if( (~vec).size() != (~mat).rows() )
2996  throw std::invalid_argument( "Vector and matrix sizes do not match" );
2997 
2998  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
2999 }
3000 //*************************************************************************************************
3001 
3002 
3003 
3004 
3005 //=================================================================================================
3006 //
3007 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
3008 //
3009 //=================================================================================================
3010 
3011 //*************************************************************************************************
3024 template< typename T1 // Type of the left-hand side dense vector
3025  , typename T2 // Type of the right-hand side dense matrix
3026  , bool SO > // Storage order of the right-hand side dense matrix
3027 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
3029 {
3031 
3032  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
3033 }
3034 //*************************************************************************************************
3035 
3036 } // namespace blaze
3037 
3038 #endif