All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
64 #include <blaze/system/BLAS.h>
66 #include <blaze/util/Assert.h>
67 #include <blaze/util/Complex.h>
73 #include <blaze/util/DisableIf.h>
74 #include <blaze/util/EnableIf.h>
76 #include <blaze/util/SelectType.h>
77 #include <blaze/util/Types.h>
83 
84 
85 namespace blaze {
86 
87 //=================================================================================================
88 //
89 // CLASS TDMATDVECMULTEXPR
90 //
91 //=================================================================================================
92 
93 //*************************************************************************************************
100 template< typename MT // Type of the left-hand side dense matrix
101  , typename VT > // Type of the right-hand side dense vector
102 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
103  , private MatVecMultExpr
104  , private Computation
105 {
106  private:
107  //**Type definitions****************************************************************************
108  typedef typename MT::ResultType MRT;
109  typedef typename VT::ResultType VRT;
110  typedef typename MRT::ElementType MET;
111  typedef typename VRT::ElementType VET;
112  typedef typename MT::CompositeType MCT;
113  typedef typename VT::CompositeType VCT;
114  //**********************************************************************************************
115 
116  //**********************************************************************************************
118  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
120  //**********************************************************************************************
121 
122  //**********************************************************************************************
124  enum { evaluateVector = IsComputation<VT>::value };
125  //**********************************************************************************************
126 
127  //**********************************************************************************************
129 
133  template< typename T1, typename T2, typename T3 >
134  struct UseSinglePrecisionKernel {
135  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
136  IsFloat<typename T1::ElementType>::value &&
137  IsFloat<typename T2::ElementType>::value &&
138  IsFloat<typename T3::ElementType>::value };
139  };
141  //**********************************************************************************************
142 
143  //**********************************************************************************************
145 
149  template< typename T1, typename T2, typename T3 >
150  struct UseDoublePrecisionKernel {
151  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
152  IsDouble<typename T1::ElementType>::value &&
153  IsDouble<typename T2::ElementType>::value &&
154  IsDouble<typename T3::ElementType>::value };
155  };
157  //**********************************************************************************************
158 
159  //**********************************************************************************************
161 
165  template< typename T1, typename T2, typename T3 >
166  struct UseSinglePrecisionComplexKernel {
167  typedef complex<float> Type;
168  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
169  IsSame<typename T1::ElementType,Type>::value &&
170  IsSame<typename T2::ElementType,Type>::value &&
171  IsSame<typename T3::ElementType,Type>::value };
172  };
174  //**********************************************************************************************
175 
176  //**********************************************************************************************
178 
182  template< typename T1, typename T2, typename T3 >
183  struct UseDoublePrecisionComplexKernel {
184  typedef complex<double> Type;
185  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
186  IsSame<typename T1::ElementType,Type>::value &&
187  IsSame<typename T2::ElementType,Type>::value &&
188  IsSame<typename T3::ElementType,Type>::value };
189  };
191  //**********************************************************************************************
192 
193  //**********************************************************************************************
195 
198  template< typename T1, typename T2, typename T3 >
199  struct UseDefaultKernel {
200  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
201  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
202  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
203  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
214  template< typename T1, typename T2, typename T3 >
215  struct UseVectorizedDefaultKernel {
216  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
217  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
218  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
219  IntrinsicTrait<typename T1::ElementType>::addition &&
220  IntrinsicTrait<typename T1::ElementType>::multiplication };
221  };
223  //**********************************************************************************************
224 
225  public:
226  //**Type definitions****************************************************************************
232  typedef const ElementType ReturnType;
233  typedef const ResultType CompositeType;
234 
236  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
237 
239  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
240 
243 
246  //**********************************************************************************************
247 
248  //**Compilation flags***************************************************************************
250  enum { vectorizable = 0 };
251 
253  enum { smpAssignable = 0 };
254  //**********************************************************************************************
255 
256  //**Constructor*********************************************************************************
262  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
263  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
264  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
265  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
266  {
267  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
268  }
269  //**********************************************************************************************
270 
271  //**Subscript operator**************************************************************************
277  inline ReturnType operator[]( size_t index ) const {
278  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
279 
280  ElementType res;
281 
282  if( mat_.columns() != 0UL ) {
283  res = mat_(index,0UL) * vec_[0UL];
284  for( size_t j=1UL; j<end_; j+=2UL ) {
285  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
286  }
287  if( end_ < mat_.columns() ) {
288  res += mat_(index,end_) * vec_[end_];
289  }
290  }
291  else {
292  reset( res );
293  }
294 
295  return res;
296  }
297  //**********************************************************************************************
298 
299  //**Size function*******************************************************************************
304  inline size_t size() const {
305  return mat_.rows();
306  }
307  //**********************************************************************************************
308 
309  //**Left operand access*************************************************************************
314  inline LeftOperand leftOperand() const {
315  return mat_;
316  }
317  //**********************************************************************************************
318 
319  //**Right operand access************************************************************************
324  inline RightOperand rightOperand() const {
325  return vec_;
326  }
327  //**********************************************************************************************
328 
329  //**********************************************************************************************
335  template< typename T >
336  inline bool canAlias( const T* alias ) const {
337  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
338  }
339  //**********************************************************************************************
340 
341  //**********************************************************************************************
347  template< typename T >
348  inline bool isAliased( const T* alias ) const {
349  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
350  }
351  //**********************************************************************************************
352 
353  private:
354  //**Member variables****************************************************************************
357  const size_t end_;
358  //**********************************************************************************************
359 
360  //**Assignment to dense vectors*****************************************************************
373  template< typename VT1 > // Type of the target dense vector
374  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
375  {
377 
378  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
379 
380  if( rhs.mat_.rows() == 0UL ) {
381  return;
382  }
383  else if( rhs.mat_.columns() == 0UL ) {
384  reset( ~lhs );
385  return;
386  }
387 
388  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
389  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
390 
391  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
392  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
393  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
394  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
395 
396  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
397  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
398  TDMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
399  else
400  TDMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
401  }
403  //**********************************************************************************************
404 
405  //**Default assignment to dense vectors*********************************************************
419  template< typename VT1 // Type of the left-hand side target vector
420  , typename MT1 // Type of the left-hand side matrix operand
421  , typename VT2 > // Type of the right-hand side vector operand
422  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
423  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
424  {
425  const size_t M( A.rows() );
426  const size_t N( A.columns() );
427 
428  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
429  const size_t iend( M & size_t(-2) );
430 
431  for( size_t i=0UL; i<M; ++i ) {
432  y[i] = x[0UL] * A(i,0UL);
433  }
434  for( size_t j=1UL; j<N; ++j ) {
435  for( size_t i=0UL; i<iend; i+=2UL ) {
436  y[i ] += x[j] * A(i ,j);
437  y[i+1UL] += x[j] * A(i+1UL,j);
438  }
439  if( iend < M ) {
440  y[iend] += x[j] * A(iend,j);
441  }
442  }
443  }
445  //**********************************************************************************************
446 
447  //**Vectorized default assignment to dense vectors**********************************************
461  template< typename VT1 // Type of the left-hand side target vector
462  , typename MT1 // Type of the left-hand side matrix operand
463  , typename VT2 > // Type of the right-hand side vector operand
464  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
465  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
466  {
467  typedef IntrinsicTrait<ElementType> IT;
468 
469  const size_t M( A.rows() );
470  const size_t N( A.columns() );
471 
472  size_t i( 0UL );
473 
474  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
475  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
476  for( size_t j=0UL; j<N; ++j ) {
477  const IntrinsicType x1( set( x[j] ) );
478  xmm1 = xmm1 + A.load(i ,j) * x1;
479  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
480  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
481  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
482  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
483  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
484  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
485  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
486  }
487  y.store( i , xmm1 );
488  y.store( i+IT::size , xmm2 );
489  y.store( i+IT::size*2UL, xmm3 );
490  y.store( i+IT::size*3UL, xmm4 );
491  y.store( i+IT::size*4UL, xmm5 );
492  y.store( i+IT::size*5UL, xmm6 );
493  y.store( i+IT::size*6UL, xmm7 );
494  y.store( i+IT::size*7UL, xmm8 );
495  }
496  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
497  IntrinsicType xmm1, xmm2, xmm3, xmm4;
498  for( size_t j=0UL; j<N; ++j ) {
499  const IntrinsicType x1( set( x[j] ) );
500  xmm1 = xmm1 + A.load(i ,j) * x1;
501  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
502  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
503  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
504  }
505  y.store( i , xmm1 );
506  y.store( i+IT::size , xmm2 );
507  y.store( i+IT::size*2UL, xmm3 );
508  y.store( i+IT::size*3UL, xmm4 );
509  }
510  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
511  IntrinsicType xmm1, xmm2, xmm3;
512  for( size_t j=0UL; j<N; ++j ) {
513  const IntrinsicType x1( set( x[j] ) );
514  xmm1 = xmm1 + A.load(i ,j) * x1;
515  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
516  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
517  }
518  y.store( i , xmm1 );
519  y.store( i+IT::size , xmm2 );
520  y.store( i+IT::size*2UL, xmm3 );
521  }
522  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
523  IntrinsicType xmm1, xmm2;
524  for( size_t j=0UL; j<N; ++j ) {
525  const IntrinsicType x1( set( x[j] ) );
526  xmm1 = xmm1 + A.load(i ,j) * x1;
527  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
528  }
529  y.store( i , xmm1 );
530  y.store( i+IT::size, xmm2 );
531  }
532  if( i < M ) {
533  IntrinsicType xmm1;
534  for( size_t j=0UL; j<N; ++j ) {
535  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
536  }
537  y.store( i, xmm1 );
538  }
539  }
541  //**********************************************************************************************
542 
543  //**BLAS-based assignment to dense vectors (default)********************************************
557  template< typename VT1 // Type of the left-hand side target vector
558  , typename MT1 // Type of the left-hand side matrix operand
559  , typename VT2 > // Type of the right-hand side vector operand
560  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
561  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
562  {
563  selectDefaultAssignKernel( y, A, x );
564  }
566  //**********************************************************************************************
567 
568  //**BLAS-based assignment to dense vectors (single precision)***********************************
569 #if BLAZE_BLAS_MODE
570 
583  template< typename VT1 // Type of the left-hand side target vector
584  , typename MT1 // Type of the left-hand side matrix operand
585  , typename VT2 > // Type of the right-hand side vector operand
586  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
587  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
588  {
589  using boost::numeric_cast;
590 
594 
595  const int M ( numeric_cast<int>( A.rows() ) );
596  const int N ( numeric_cast<int>( A.columns() ) );
597  const int lda( numeric_cast<int>( A.spacing() ) );
598 
599  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
600  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
601  }
603 #endif
604  //**********************************************************************************************
605 
606  //**BLAS-based assignment to dense vectors (double precision)***********************************
607 #if BLAZE_BLAS_MODE
608 
621  template< typename VT1 // Type of the left-hand side target vector
622  , typename MT1 // Type of the left-hand side matrix operand
623  , typename VT2 > // Type of the right-hand side vector operand
624  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
625  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
626  {
627  using boost::numeric_cast;
628 
632 
633  const int M ( numeric_cast<int>( A.rows() ) );
634  const int N ( numeric_cast<int>( A.columns() ) );
635  const int lda( numeric_cast<int>( A.spacing() ) );
636 
637  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
638  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
639  }
641 #endif
642  //**********************************************************************************************
643 
644  //**BLAS-based assignment to dense vectors (single precision complex)***************************
645 #if BLAZE_BLAS_MODE
646 
659  template< typename VT1 // Type of the left-hand side target vector
660  , typename MT1 // Type of the left-hand side matrix operand
661  , typename VT2 > // Type of the right-hand side vector operand
662  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
663  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
664  {
665  using boost::numeric_cast;
666 
670  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
671  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
672  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
673 
674  const int M ( numeric_cast<int>( A.rows() ) );
675  const int N ( numeric_cast<int>( A.columns() ) );
676  const int lda( numeric_cast<int>( A.spacing() ) );
677  const complex<float> alpha( 1.0F, 0.0F );
678  const complex<float> beta ( 0.0F, 0.0F );
679 
680  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
681  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
682  }
684 #endif
685  //**********************************************************************************************
686 
687  //**BLAS-based assignment to dense vectors (double precision complex)***************************
688 #if BLAZE_BLAS_MODE
689 
702  template< typename VT1 // Type of the left-hand side target vector
703  , typename MT1 // Type of the left-hand side matrix operand
704  , typename VT2 > // Type of the right-hand side vector operand
705  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
706  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
707  {
708  using boost::numeric_cast;
709 
713  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
714  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
715  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
716 
717  const int M ( numeric_cast<int>( A.rows() ) );
718  const int N ( numeric_cast<int>( A.columns() ) );
719  const int lda( numeric_cast<int>( A.spacing() ) );
720  const complex<double> alpha( 1.0, 0.0 );
721  const complex<double> beta ( 0.0, 0.0 );
722 
723  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
724  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
725  }
727 #endif
728  //**********************************************************************************************
729 
730  //**Assignment to sparse vectors****************************************************************
743  template< typename VT1 > // Type of the target sparse vector
744  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
745  {
747 
751 
752  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
753 
754  const ResultType tmp( rhs );
755  assign( ~lhs, tmp );
756  }
758  //**********************************************************************************************
759 
760  //**Addition assignment to dense vectors********************************************************
773  template< typename VT1 > // Type of the target dense vector
774  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
775  {
777 
778  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
779 
780  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
781  return;
782  }
783 
784  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
785  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
786 
787  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
788  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
789  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
790  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
791 
792  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
793  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
794  TDMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
795  else
796  TDMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
797  }
799  //**********************************************************************************************
800 
801  //**Default addition assignment to dense vectors************************************************
815  template< typename VT1 // Type of the left-hand side target vector
816  , typename MT1 // Type of the left-hand side matrix operand
817  , typename VT2 > // Type of the right-hand side vector operand
818  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
819  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
820  {
821  const size_t M( A.rows() );
822  const size_t N( A.columns() );
823 
824  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
825  const size_t iend( M & size_t(-2) );
826 
827  for( size_t j=0UL; j<N; ++j ) {
828  for( size_t i=0UL; i<iend; i+=2UL ) {
829  y[i ] += x[j] * A(i ,j);
830  y[i+1UL] += x[j] * A(i+1UL,j);
831  }
832  if( iend < M ) {
833  y[iend] += x[j] * A(iend,j);
834  }
835  }
836  }
838  //**********************************************************************************************
839 
840  //**Vectorized default addition assignment to dense vectors*************************************
854  template< typename VT1 // Type of the left-hand side target vector
855  , typename MT1 // Type of the left-hand side matrix operand
856  , typename VT2 > // Type of the right-hand side vector operand
857  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
858  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
859  {
860  typedef IntrinsicTrait<ElementType> IT;
861 
862  const size_t M( A.rows() );
863  const size_t N( A.columns() );
864 
865  size_t i( 0UL );
866 
867  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
868  IntrinsicType xmm1( y.load(i ) );
869  IntrinsicType xmm2( y.load(i+IT::size ) );
870  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
871  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
872  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
873  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
874  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
875  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
876  for( size_t j=0UL; j<N; ++j ) {
877  const IntrinsicType x1( set( x[j] ) );
878  xmm1 = xmm1 + A.load(i ,j) * x1;
879  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
880  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
881  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
882  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
883  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
884  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
885  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
886  }
887  y.store( i , xmm1 );
888  y.store( i+IT::size , xmm2 );
889  y.store( i+IT::size*2UL, xmm3 );
890  y.store( i+IT::size*3UL, xmm4 );
891  y.store( i+IT::size*4UL, xmm5 );
892  y.store( i+IT::size*5UL, xmm6 );
893  y.store( i+IT::size*6UL, xmm7 );
894  y.store( i+IT::size*7UL, xmm8 );
895  }
896  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
897  IntrinsicType xmm1( y.load(i ) );
898  IntrinsicType xmm2( y.load(i+IT::size ) );
899  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
900  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
901  for( size_t j=0UL; j<N; ++j ) {
902  const IntrinsicType x1( set( x[j] ) );
903  xmm1 = xmm1 + A.load(i ,j) * x1;
904  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
905  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
906  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
907  }
908  y.store( i , xmm1 );
909  y.store( i+IT::size , xmm2 );
910  y.store( i+IT::size*2UL, xmm3 );
911  y.store( i+IT::size*3UL, xmm4 );
912  }
913  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
914  IntrinsicType xmm1( y.load(i ) );
915  IntrinsicType xmm2( y.load(i+IT::size ) );
916  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
917  for( size_t j=0UL; j<N; ++j ) {
918  const IntrinsicType x1( set( x[j] ) );
919  xmm1 = xmm1 + A.load(i ,j) * x1;
920  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
921  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
922  }
923  y.store( i , xmm1 );
924  y.store( i+IT::size , xmm2 );
925  y.store( i+IT::size*2UL, xmm3 );
926  }
927  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
928  IntrinsicType xmm1( y.load(i ) );
929  IntrinsicType xmm2( y.load(i+IT::size) );
930  for( size_t j=0UL; j<N; ++j ) {
931  const IntrinsicType x1( set( x[j] ) );
932  xmm1 = xmm1 + A.load(i ,j) * x1;
933  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
934  }
935  y.store( i , xmm1 );
936  y.store( i+IT::size, xmm2 );
937  }
938  if( i < M ) {
939  IntrinsicType xmm1( y.load(i) );
940  for( size_t j=0UL; j<N; ++j ) {
941  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
942  }
943  y.store( i, xmm1 );
944  }
945  }
947  //**********************************************************************************************
948 
949  //**BLAS-based addition assignment to dense vectors (default)***********************************
963  template< typename VT1 // Type of the left-hand side target vector
964  , typename MT1 // Type of the left-hand side matrix operand
965  , typename VT2 > // Type of the right-hand side vector operand
966  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
967  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
968  {
969  selectDefaultAddAssignKernel( y, A, x );
970  }
972  //**********************************************************************************************
973 
974  //**BLAS-based addition assignment to dense vectors (single precision)**************************
975 #if BLAZE_BLAS_MODE
976 
989  template< typename VT1 // Type of the left-hand side target vector
990  , typename MT1 // Type of the left-hand side matrix operand
991  , typename VT2 > // Type of the right-hand side vector operand
992  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
993  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
994  {
995  using boost::numeric_cast;
996 
1000 
1001  const int M ( numeric_cast<int>( A.rows() ) );
1002  const int N ( numeric_cast<int>( A.columns() ) );
1003  const int lda( numeric_cast<int>( A.spacing() ) );
1004 
1005  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
1006  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1007  }
1009 #endif
1010  //**********************************************************************************************
1011 
1012  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1013 #if BLAZE_BLAS_MODE
1014 
1027  template< typename VT1 // Type of the left-hand side target vector
1028  , typename MT1 // Type of the left-hand side matrix operand
1029  , typename VT2 > // Type of the right-hand side vector operand
1030  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1031  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1032  {
1033  using boost::numeric_cast;
1034 
1038 
1039  const int M ( numeric_cast<int>( A.rows() ) );
1040  const int N ( numeric_cast<int>( A.columns() ) );
1041  const int lda( numeric_cast<int>( A.spacing() ) );
1042 
1043  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
1044  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1045  }
1047 #endif
1048  //**********************************************************************************************
1049 
1050  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1051 #if BLAZE_BLAS_MODE
1052 
1065  template< typename VT1 // Type of the left-hand side target vector
1066  , typename MT1 // Type of the left-hand side matrix operand
1067  , typename VT2 > // Type of the right-hand side vector operand
1068  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1069  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1070  {
1071  using boost::numeric_cast;
1072 
1076  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1077  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1078  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1079 
1080  const int M ( numeric_cast<int>( A.rows() ) );
1081  const int N ( numeric_cast<int>( A.columns() ) );
1082  const int lda( numeric_cast<int>( A.spacing() ) );
1083  const complex<float> alpha( 1.0F, 0.0F );
1084  const complex<float> beta ( 1.0F, 0.0F );
1085 
1086  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1087  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1088  }
1090 #endif
1091  //**********************************************************************************************
1092 
1093  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1094 #if BLAZE_BLAS_MODE
1095 
1108  template< typename VT1 // Type of the left-hand side target vector
1109  , typename MT1 // Type of the left-hand side matrix operand
1110  , typename VT2 > // Type of the right-hand side vector operand
1111  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1112  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1113  {
1114  using boost::numeric_cast;
1115 
1119  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1120  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1121  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1122 
1123  const int M ( numeric_cast<int>( A.rows() ) );
1124  const int N ( numeric_cast<int>( A.columns() ) );
1125  const int lda( numeric_cast<int>( A.spacing() ) );
1126  const complex<double> alpha( 1.0, 0.0 );
1127  const complex<double> beta ( 1.0, 0.0 );
1128 
1129  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1130  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1131  }
1133 #endif
1134  //**********************************************************************************************
1135 
1136  //**Addition assignment to sparse vectors*******************************************************
1137  // No special implementation for the addition assignment to sparse vectors.
1138  //**********************************************************************************************
1139 
1140  //**Subtraction assignment to dense vectors*****************************************************
1153  template< typename VT1 > // Type of the target dense vector
1154  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1155  {
1157 
1158  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1159 
1160  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1161  return;
1162  }
1163 
1164  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1165  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1166 
1167  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1168  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1169  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1170  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1171 
1172  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1173  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1174  TDMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1175  else
1176  TDMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1177  }
1179  //**********************************************************************************************
1180 
1181  //**Default subtraction assignment to dense vectors*********************************************
1195  template< typename VT1 // Type of the left-hand side target vector
1196  , typename MT1 // Type of the left-hand side matrix operand
1197  , typename VT2 > // Type of the right-hand side vector operand
1198  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1199  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1200  {
1201  const size_t M( A.rows() );
1202  const size_t N( A.columns() );
1203 
1204  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1205  const size_t iend( M & size_t(-2) );
1206 
1207  for( size_t j=0UL; j<N; ++j ) {
1208  for( size_t i=0UL; i<iend; i+=2UL ) {
1209  y[i ] -= x[j] * A(i ,j);
1210  y[i+1UL] -= x[j] * A(i+1UL,j);
1211  }
1212  if( iend < M ) {
1213  y[iend] -= x[j] * A(iend,j);
1214  }
1215  }
1216  }
1218  //**********************************************************************************************
1219 
1220  //**Vectorized default subtraction assignment to dense vectors**********************************
1234  template< typename VT1 // Type of the left-hand side target vector
1235  , typename MT1 // Type of the left-hand side matrix operand
1236  , typename VT2 > // Type of the right-hand side vector operand
1237  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1238  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1239  {
1240  typedef IntrinsicTrait<ElementType> IT;
1241 
1242  const size_t M( A.rows() );
1243  const size_t N( A.columns() );
1244 
1245  size_t i( 0UL );
1246 
1247  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1248  IntrinsicType xmm1( y.load(i ) );
1249  IntrinsicType xmm2( y.load(i+IT::size ) );
1250  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1251  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1252  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1253  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1254  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1255  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1256  for( size_t j=0UL; j<N; ++j ) {
1257  const IntrinsicType x1( set( x[j] ) );
1258  xmm1 = xmm1 - A.load(i ,j) * x1;
1259  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1260  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1261  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1262  xmm5 = xmm5 - A.load(i+IT::size*4UL,j) * x1;
1263  xmm6 = xmm6 - A.load(i+IT::size*5UL,j) * x1;
1264  xmm7 = xmm7 - A.load(i+IT::size*6UL,j) * x1;
1265  xmm8 = xmm8 - A.load(i+IT::size*7UL,j) * x1;
1266  }
1267  y.store( i , xmm1 );
1268  y.store( i+IT::size , xmm2 );
1269  y.store( i+IT::size*2UL, xmm3 );
1270  y.store( i+IT::size*3UL, xmm4 );
1271  y.store( i+IT::size*4UL, xmm5 );
1272  y.store( i+IT::size*5UL, xmm6 );
1273  y.store( i+IT::size*6UL, xmm7 );
1274  y.store( i+IT::size*7UL, xmm8 );
1275  }
1276  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1277  IntrinsicType xmm1( y.load(i ) );
1278  IntrinsicType xmm2( y.load(i+IT::size ) );
1279  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1280  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1281  for( size_t j=0UL; j<N; ++j ) {
1282  const IntrinsicType x1( set( x[j] ) );
1283  xmm1 = xmm1 - A.load(i ,j) * x1;
1284  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1285  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1286  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1287  }
1288  y.store( i , xmm1 );
1289  y.store( i+IT::size , xmm2 );
1290  y.store( i+IT::size*2UL, xmm3 );
1291  y.store( i+IT::size*3UL, xmm4 );
1292  }
1293  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1294  IntrinsicType xmm1( y.load(i ) );
1295  IntrinsicType xmm2( y.load(i+IT::size ) );
1296  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1297  for( size_t j=0UL; j<N; ++j ) {
1298  const IntrinsicType x1( set( x[j] ) );
1299  xmm1 = xmm1 - A.load(i ,j) * x1;
1300  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1301  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1302  }
1303  y.store( i , xmm1 );
1304  y.store( i+IT::size , xmm2 );
1305  y.store( i+IT::size*2UL, xmm3 );
1306  }
1307  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1308  IntrinsicType xmm1( y.load(i ) );
1309  IntrinsicType xmm2( y.load(i+IT::size) );
1310  for( size_t j=0UL; j<N; ++j ) {
1311  const IntrinsicType x1( set( x[j] ) );
1312  xmm1 = xmm1 - A.load(i ,j) * x1;
1313  xmm2 = xmm2 - A.load(i+IT::size,j) * x1;
1314  }
1315  y.store( i , xmm1 );
1316  y.store( i+IT::size, xmm2 );
1317  }
1318  if( i < M ) {
1319  IntrinsicType xmm1( y.load(i) );
1320  for( size_t j=0UL; j<N; ++j ) {
1321  xmm1 = xmm1 - A.load(i,j) * set( x[j] );
1322  }
1323  y.store( i, xmm1 );
1324  }
1325  }
1327  //**********************************************************************************************
1328 
1329  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1343  template< typename VT1 // Type of the left-hand side target vector
1344  , typename MT1 // Type of the left-hand side matrix operand
1345  , typename VT2 > // Type of the right-hand side vector operand
1346  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1347  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1348  {
1349  selectDefaultSubAssignKernel( y, A, x );
1350  }
1352  //**********************************************************************************************
1353 
1354  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1355 #if BLAZE_BLAS_MODE
1356 
1369  template< typename VT1 // Type of the left-hand side target vector
1370  , typename MT1 // Type of the left-hand side matrix operand
1371  , typename VT2 > // Type of the right-hand side vector operand
1372  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1373  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1374  {
1375  using boost::numeric_cast;
1376 
1380 
1381  const int M ( numeric_cast<int>( A.rows() ) );
1382  const int N ( numeric_cast<int>( A.columns() ) );
1383  const int lda( numeric_cast<int>( A.spacing() ) );
1384 
1385  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -1.0F,
1386  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1387  }
1389 #endif
1390  //**********************************************************************************************
1391 
1392  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1393 #if BLAZE_BLAS_MODE
1394 
1407  template< typename VT1 // Type of the left-hand side target vector
1408  , typename MT1 // Type of the left-hand side matrix operand
1409  , typename VT2 > // Type of the right-hand side vector operand
1410  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1411  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1412  {
1413  using boost::numeric_cast;
1414 
1418 
1419  const int M ( numeric_cast<int>( A.rows() ) );
1420  const int N ( numeric_cast<int>( A.columns() ) );
1421  const int lda( numeric_cast<int>( A.spacing() ) );
1422 
1423  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -1.0,
1424  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1425  }
1427 #endif
1428  //**********************************************************************************************
1429 
1430  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1431 #if BLAZE_BLAS_MODE
1432 
1445  template< typename VT1 // Type of the left-hand side target vector
1446  , typename MT1 // Type of the left-hand side matrix operand
1447  , typename VT2 > // Type of the right-hand side vector operand
1448  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1449  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1450  {
1451  using boost::numeric_cast;
1452 
1456  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1457  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1458  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1459 
1460  const int M ( numeric_cast<int>( A.rows() ) );
1461  const int N ( numeric_cast<int>( A.columns() ) );
1462  const int lda( numeric_cast<int>( A.spacing() ) );
1463  const complex<float> alpha( -1.0F, 0.0F );
1464  const complex<float> beta ( 1.0F, 0.0F );
1465 
1466  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1467  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1468  }
1470 #endif
1471  //**********************************************************************************************
1472 
1473  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1474 #if BLAZE_BLAS_MODE
1475 
1488  template< typename VT1 // Type of the left-hand side target vector
1489  , typename MT1 // Type of the left-hand side matrix operand
1490  , typename VT2 > // Type of the right-hand side vector operand
1491  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1492  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1493  {
1494  using boost::numeric_cast;
1495 
1499  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1500  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1501  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1502 
1503  const int M ( numeric_cast<int>( A.rows() ) );
1504  const int N ( numeric_cast<int>( A.columns() ) );
1505  const int lda( numeric_cast<int>( A.spacing() ) );
1506  const complex<double> alpha( -1.0, 0.0 );
1507  const complex<double> beta ( 1.0, 0.0 );
1508 
1509  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1510  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1511  }
1513 #endif
1514  //**********************************************************************************************
1515 
1516  //**Subtraction assignment to sparse vectors****************************************************
1517  // No special implementation for the subtraction assignment to sparse vectors.
1518  //**********************************************************************************************
1519 
1520  //**Multiplication assignment to dense vectors**************************************************
1533  template< typename VT1 > // Type of the target dense vector
1534  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1535  {
1537 
1541 
1542  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1543 
1544  const ResultType tmp( rhs );
1545  multAssign( ~lhs, tmp );
1546  }
1548  //**********************************************************************************************
1549 
1550  //**Multiplication assignment to sparse vectors*************************************************
1551  // No special implementation for the multiplication assignment to sparse vectors.
1552  //**********************************************************************************************
1553 
1554  //**Compile time checks*************************************************************************
1561  //**********************************************************************************************
1562 };
1563 //*************************************************************************************************
1564 
1565 
1566 
1567 
1568 //=================================================================================================
1569 //
1570 // DVECSCALARMULTEXPR SPECIALIZATION
1571 //
1572 //=================================================================================================
1573 
1574 //*************************************************************************************************
1583 template< typename MT // Type of the left-hand side dense matrix
1584  , typename VT // Type of the right-hand side dense vector
1585  , typename ST > // Type of the side scalar value
1586 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
1587  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1588  , private VecScalarMultExpr
1589  , private Computation
1590 {
1591  private:
1592  //**Type definitions****************************************************************************
1593  typedef TDMatDVecMultExpr<MT,VT> MVM;
1594  typedef typename MVM::ResultType RES;
1595  typedef typename MT::ResultType MRT;
1596  typedef typename VT::ResultType VRT;
1597  typedef typename MRT::ElementType MET;
1598  typedef typename VRT::ElementType VET;
1599  typedef typename MT::CompositeType MCT;
1600  typedef typename VT::CompositeType VCT;
1601  //**********************************************************************************************
1602 
1603  //**********************************************************************************************
1605  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
1606  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1607  //**********************************************************************************************
1608 
1609  //**********************************************************************************************
1611  enum { evaluateVector = IsComputation<VT>::value };
1612  //**********************************************************************************************
1613 
1614  //**********************************************************************************************
1616 
1619  template< typename T1, typename T2, typename T3, typename T4 >
1620  struct UseSinglePrecisionKernel {
1621  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1622  IsFloat<typename T1::ElementType>::value &&
1623  IsFloat<typename T2::ElementType>::value &&
1624  IsFloat<typename T3::ElementType>::value &&
1625  !IsComplex<T4>::value };
1626  };
1627  //**********************************************************************************************
1628 
1629  //**********************************************************************************************
1631 
1634  template< typename T1, typename T2, typename T3, typename T4 >
1635  struct UseDoublePrecisionKernel {
1636  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1637  IsDouble<typename T1::ElementType>::value &&
1638  IsDouble<typename T2::ElementType>::value &&
1639  IsDouble<typename T3::ElementType>::value &&
1640  !IsComplex<T4>::value };
1641  };
1642  //**********************************************************************************************
1643 
1644  //**********************************************************************************************
1646 
1649  template< typename T1, typename T2, typename T3 >
1650  struct UseSinglePrecisionComplexKernel {
1651  typedef complex<float> Type;
1652  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1653  IsSame<typename T1::ElementType,Type>::value &&
1654  IsSame<typename T2::ElementType,Type>::value &&
1655  IsSame<typename T3::ElementType,Type>::value };
1656  };
1657  //**********************************************************************************************
1658 
1659  //**********************************************************************************************
1661 
1664  template< typename T1, typename T2, typename T3 >
1665  struct UseDoublePrecisionComplexKernel {
1666  typedef complex<double> Type;
1667  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1668  IsSame<typename T1::ElementType,Type>::value &&
1669  IsSame<typename T2::ElementType,Type>::value &&
1670  IsSame<typename T3::ElementType,Type>::value };
1671  };
1672  //**********************************************************************************************
1673 
1674  //**********************************************************************************************
1676 
1678  template< typename T1, typename T2, typename T3, typename T4 >
1679  struct UseDefaultKernel {
1680  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1681  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1682  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1683  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1684  };
1685  //**********************************************************************************************
1686 
1687  //**********************************************************************************************
1689 
1692  template< typename T1, typename T2, typename T3, typename T4 >
1693  struct UseVectorizedDefaultKernel {
1694  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1695  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1696  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1697  IsSame<typename T1::ElementType,T4>::value &&
1698  IntrinsicTrait<typename T1::ElementType>::addition &&
1699  IntrinsicTrait<typename T1::ElementType>::multiplication };
1700  };
1701  //**********************************************************************************************
1702 
1703  public:
1704  //**Type definitions****************************************************************************
1705  typedef DVecScalarMultExpr<MVM,ST,false> This;
1706  typedef typename MultTrait<RES,ST>::Type ResultType;
1707  typedef typename ResultType::TransposeType TransposeType;
1708  typedef typename ResultType::ElementType ElementType;
1709  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1710  typedef const ElementType ReturnType;
1711  typedef const ResultType CompositeType;
1712 
1714  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
1715 
1717  typedef ST RightOperand;
1718 
1720  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
1721 
1723  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
1724  //**********************************************************************************************
1725 
1726  //**Compilation flags***************************************************************************
1728  enum { vectorizable = 0 };
1729 
1731  enum { smpAssignable = 0 };
1732  //**********************************************************************************************
1733 
1734  //**Constructor*********************************************************************************
1740  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1741  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1742  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1743  {}
1744  //**********************************************************************************************
1745 
1746  //**Subscript operator**************************************************************************
1752  inline ReturnType operator[]( size_t index ) const {
1753  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1754  return vector_[index] * scalar_;
1755  }
1756  //**********************************************************************************************
1757 
1758  //**Size function*******************************************************************************
1763  inline size_t size() const {
1764  return vector_.size();
1765  }
1766  //**********************************************************************************************
1767 
1768  //**Left operand access*************************************************************************
1773  inline LeftOperand leftOperand() const {
1774  return vector_;
1775  }
1776  //**********************************************************************************************
1777 
1778  //**Right operand access************************************************************************
1783  inline RightOperand rightOperand() const {
1784  return scalar_;
1785  }
1786  //**********************************************************************************************
1787 
1788  //**********************************************************************************************
1794  template< typename T >
1795  inline bool canAlias( const T* alias ) const {
1796  return vector_.canAlias( alias );
1797  }
1798  //**********************************************************************************************
1799 
1800  //**********************************************************************************************
1806  template< typename T >
1807  inline bool isAliased( const T* alias ) const {
1808  return vector_.isAliased( alias );
1809  }
1810  //**********************************************************************************************
1811 
1812  private:
1813  //**Member variables****************************************************************************
1814  LeftOperand vector_;
1815  RightOperand scalar_;
1816  //**********************************************************************************************
1817 
1818  //**Assignment to dense vectors*****************************************************************
1830  template< typename VT1 > // Type of the target dense vector
1831  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1832  {
1834 
1835  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1836 
1837  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1838  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1839 
1840  if( left.rows() == 0UL ) {
1841  return;
1842  }
1843  else if( left.columns() == 0UL ) {
1844  reset( ~lhs );
1845  return;
1846  }
1847 
1848  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1849  RT x( right ); // Evaluation of the right-hand side dense vector operand
1850 
1851  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1852  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1853  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1854  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1855 
1856  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1857  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1858  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1859  else
1860  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1861  }
1862  //**********************************************************************************************
1863 
1864  //**Default assignment to dense vectors*********************************************************
1878  template< typename VT1 // Type of the left-hand side target vector
1879  , typename MT1 // Type of the left-hand side matrix operand
1880  , typename VT2 // Type of the right-hand side vector operand
1881  , typename ST2 > // Type of the scalar value
1882  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1883  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1884  {
1885  const size_t M( A.rows() );
1886  const size_t N( A.columns() );
1887 
1888  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1889  const size_t iend( M & size_t(-2) );
1890 
1891  for( size_t i=0UL; i<M; ++i ) {
1892  y[i] = x[0UL] * A(i,0UL);
1893  }
1894  for( size_t j=1UL; j<N; ++j ) {
1895  for( size_t i=0UL; i<iend; i+=2UL ) {
1896  y[i ] += x[j] * A(i ,j);
1897  y[i+1UL] += x[j] * A(i+1UL,j);
1898  }
1899  if( iend < M ) {
1900  y[iend] += x[j] * A(iend,j);
1901  }
1902  }
1903  for( size_t i=0UL; i<M; ++i ) {
1904  y[i] *= scalar;
1905  }
1906  }
1907  //**********************************************************************************************
1908 
1909  //**Vectorized default assignment to dense vectors**********************************************
1923  template< typename VT1 // Type of the left-hand side target vector
1924  , typename MT1 // Type of the left-hand side matrix operand
1925  , typename VT2 // Type of the right-hand side vector operand
1926  , typename ST2 > // Type of the scalar value
1927  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1928  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1929  {
1930  typedef IntrinsicTrait<ElementType> IT;
1931 
1932  const size_t M( A.rows() );
1933  const size_t N( A.columns() );
1934 
1935  const IntrinsicType factor( set( scalar ) );
1936 
1937  size_t i( 0UL );
1938 
1939  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1940  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1941  for( size_t j=0UL; j<N; ++j ) {
1942  const IntrinsicType x1( set( x[j] ) );
1943  xmm1 = xmm1 + A.load(i ,j) * x1;
1944  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1945  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1946  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1947  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1948  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1949  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1950  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1951  }
1952  y.store( i , xmm1*factor );
1953  y.store( i+IT::size , xmm2*factor );
1954  y.store( i+IT::size*2UL, xmm3*factor );
1955  y.store( i+IT::size*3UL, xmm4*factor );
1956  y.store( i+IT::size*4UL, xmm5*factor );
1957  y.store( i+IT::size*5UL, xmm6*factor );
1958  y.store( i+IT::size*6UL, xmm7*factor );
1959  y.store( i+IT::size*7UL, xmm8*factor );
1960  }
1961  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1962  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1963  for( size_t j=0UL; j<N; ++j ) {
1964  const IntrinsicType x1( set( x[j] ) );
1965  xmm1 = xmm1 + A.load(i ,j) * x1;
1966  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1967  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1968  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1969  }
1970  y.store( i , xmm1*factor );
1971  y.store( i+IT::size , xmm2*factor );
1972  y.store( i+IT::size*2UL, xmm3*factor );
1973  y.store( i+IT::size*3UL, xmm4*factor );
1974  }
1975  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1976  IntrinsicType xmm1, xmm2, xmm3;
1977  for( size_t j=0UL; j<N; ++j ) {
1978  const IntrinsicType x1( set( x[j] ) );
1979  xmm1 = xmm1 + A.load(i ,j) * x1;
1980  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1981  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1982  }
1983  y.store( i , xmm1*factor );
1984  y.store( i+IT::size , xmm2*factor );
1985  y.store( i+IT::size*2UL, xmm3*factor );
1986  }
1987  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1988  IntrinsicType xmm1, xmm2;
1989  for( size_t j=0UL; j<N; ++j ) {
1990  const IntrinsicType x1( set( x[j] ) );
1991  xmm1 = xmm1 + A.load(i ,j) * x1;
1992  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1993  }
1994  y.store( i , xmm1*factor );
1995  y.store( i+IT::size, xmm2*factor );
1996  }
1997  if( i < M ) {
1998  IntrinsicType xmm1;
1999  for( size_t j=0UL; j<N; ++j ) {
2000  const IntrinsicType x1( set( x[j] ) );
2001  xmm1 = xmm1 + A.load(i,j) * x1;
2002  }
2003  y.store( i, xmm1*factor );
2004  }
2005  }
2006  //**********************************************************************************************
2007 
2008  //**BLAS-based assignment to dense vectors (default)********************************************
2022  template< typename VT1 // Type of the left-hand side target vector
2023  , typename MT1 // Type of the left-hand side matrix operand
2024  , typename VT2 // Type of the right-hand side vector operand
2025  , typename ST2 > // Type of the scalar value
2026  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2027  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2028  {
2029  selectDefaultAssignKernel( y, A, x, scalar );
2030  }
2031  //**********************************************************************************************
2032 
2033  //**BLAS-based assignment to dense vectors (single precision)***********************************
2034 #if BLAZE_BLAS_MODE
2035 
2048  template< typename VT1 // Type of the left-hand side target vector
2049  , typename MT1 // Type of the left-hand side matrix operand
2050  , typename VT2 // Type of the right-hand side vector operand
2051  , typename ST2 > // Type of the scalar value
2052  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2053  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2054  {
2055  using boost::numeric_cast;
2056 
2060 
2061  const int M ( numeric_cast<int>( A.rows() ) );
2062  const int N ( numeric_cast<int>( A.columns() ) );
2063  const int lda( numeric_cast<int>( A.spacing() ) );
2064 
2065  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2066  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2067  }
2068 #endif
2069  //**********************************************************************************************
2070 
2071  //**BLAS-based assignment to dense vectors (double precision)***********************************
2072 #if BLAZE_BLAS_MODE
2073 
2086  template< typename VT1 // Type of the left-hand side target vector
2087  , typename MT1 // Type of the left-hand side matrix operand
2088  , typename VT2 // Type of the right-hand side vector operand
2089  , typename ST2 > // Type of the scalar value
2090  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2091  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2092  {
2093  using boost::numeric_cast;
2094 
2098 
2099  const int M ( numeric_cast<int>( A.rows() ) );
2100  const int N ( numeric_cast<int>( A.columns() ) );
2101  const int lda( numeric_cast<int>( A.spacing() ) );
2102 
2103  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2104  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2105  }
2106 #endif
2107  //**********************************************************************************************
2108 
2109  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2110 #if BLAZE_BLAS_MODE
2111 
2124  template< typename VT1 // Type of the left-hand side target vector
2125  , typename MT1 // Type of the left-hand side matrix operand
2126  , typename VT2 // Type of the right-hand side vector operand
2127  , typename ST2 > // Type of the scalar value
2128  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2129  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2130  {
2131  using boost::numeric_cast;
2132 
2136  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2137  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2138  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2139 
2140  const int M ( numeric_cast<int>( A.rows() ) );
2141  const int N ( numeric_cast<int>( A.columns() ) );
2142  const int lda( numeric_cast<int>( A.spacing() ) );
2143  const complex<float> alpha( scalar );
2144  const complex<float> beta ( 0.0F, 0.0F );
2145 
2146  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2147  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2148  }
2149 #endif
2150  //**********************************************************************************************
2151 
2152  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2153 #if BLAZE_BLAS_MODE
2154 
2167  template< typename VT1 // Type of the left-hand side target vector
2168  , typename MT1 // Type of the left-hand side matrix operand
2169  , typename VT2 // Type of the right-hand side vector operand
2170  , typename ST2 > // Type of the scalar value
2171  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2172  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2173  {
2174  using boost::numeric_cast;
2175 
2179  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2180  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2181  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2182 
2183  const int M ( numeric_cast<int>( A.rows() ) );
2184  const int N ( numeric_cast<int>( A.columns() ) );
2185  const int lda( numeric_cast<int>( A.spacing() ) );
2186  const complex<double> alpha( scalar );
2187  const complex<double> beta ( 0.0, 0.0 );
2188 
2189  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2190  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2191  }
2192 #endif
2193  //**********************************************************************************************
2194 
2195  //**Assignment to sparse vectors****************************************************************
2207  template< typename VT1 > // Type of the target sparse vector
2208  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2209  {
2211 
2215 
2216  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2217 
2218  const ResultType tmp( rhs );
2219  assign( ~lhs, tmp );
2220  }
2221  //**********************************************************************************************
2222 
2223  //**Addition assignment to dense vectors********************************************************
2235  template< typename VT1 > // Type of the target dense vector
2236  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2237  {
2239 
2240  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2241 
2242  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2243  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2244 
2245  if( left.rows() == 0UL || left.columns() == 0UL ) {
2246  return;
2247  }
2248 
2249  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2250  RT x( right ); // Evaluation of the right-hand side dense vector operand
2251 
2252  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2253  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2254  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2255  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2256 
2257  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2258  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2259  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2260  else
2261  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2262  }
2263  //**********************************************************************************************
2264 
2265  //**Default addition assignment to dense vectors************************************************
2279  template< typename VT1 // Type of the left-hand side target vector
2280  , typename MT1 // Type of the left-hand side matrix operand
2281  , typename VT2 // Type of the right-hand side vector operand
2282  , typename ST2 > // Type of the scalar value
2283  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2284  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2285  {
2286  y.addAssign( A * x * scalar );
2287  }
2288  //**********************************************************************************************
2289 
2290  //**Vectorized default addition assignment to dense vectors*************************************
2304  template< typename VT1 // Type of the left-hand side target vector
2305  , typename MT1 // Type of the left-hand side matrix operand
2306  , typename VT2 // Type of the right-hand side vector operand
2307  , typename ST2 > // Type of the scalar value
2308  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2309  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2310  {
2311  typedef IntrinsicTrait<ElementType> IT;
2312 
2313  const size_t M( A.rows() );
2314  const size_t N( A.columns() );
2315 
2316  const IntrinsicType factor( set( scalar ) );
2317 
2318  size_t i( 0UL );
2319 
2320  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2321  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2322  for( size_t j=0UL; j<N; ++j ) {
2323  const IntrinsicType x1( set( x[j] ) );
2324  xmm1 = xmm1 + A.load(i ,j) * x1;
2325  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2326  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2327  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2328  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2329  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2330  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2331  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2332  }
2333  y.store( i , y.load(i ) + xmm1*factor );
2334  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2335  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2336  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2337  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
2338  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
2339  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
2340  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
2341  }
2342  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2343  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2344  for( size_t j=0UL; j<N; ++j ) {
2345  const IntrinsicType x1( set( x[j] ) );
2346  xmm1 = xmm1 + A.load(i ,j) * x1;
2347  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2348  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2349  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2350  }
2351  y.store( i , y.load(i ) + xmm1*factor );
2352  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2353  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2354  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2355  }
2356  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2357  IntrinsicType xmm1, xmm2, xmm3;
2358  for( size_t j=0UL; j<N; ++j ) {
2359  const IntrinsicType x1( set( x[j] ) );
2360  xmm1 = xmm1 + A.load(i ,j) * x1;
2361  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2362  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2363  }
2364  y.store( i , y.load(i ) + xmm1*factor );
2365  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2366  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2367  }
2368  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2369  IntrinsicType xmm1, xmm2;
2370  for( size_t j=0UL; j<N; ++j ) {
2371  const IntrinsicType x1( set( x[j] ) );
2372  xmm1 = xmm1 + A.load(i ,j) * x1;
2373  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2374  }
2375  y.store( i , y.load(i ) + xmm1*factor );
2376  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
2377  }
2378  if( i < M ) {
2379  IntrinsicType xmm1;
2380  for( size_t j=0UL; j<N; ++j ) {
2381  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
2382  }
2383  y.store( i, y.load(i) + xmm1*factor );
2384  }
2385  }
2386  //**********************************************************************************************
2387 
2388  //**BLAS-based addition assignment to dense vectors (default)***********************************
2402  template< typename VT1 // Type of the left-hand side target vector
2403  , typename MT1 // Type of the left-hand side matrix operand
2404  , typename VT2 // Type of the right-hand side vector operand
2405  , typename ST2 > // Type of the scalar value
2406  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2407  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2408  {
2409  selectDefaultAddAssignKernel( y, A, x, scalar );
2410  }
2411  //**********************************************************************************************
2412 
2413  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2414 #if BLAZE_BLAS_MODE
2415 
2428  template< typename VT1 // Type of the left-hand side target vector
2429  , typename MT1 // Type of the left-hand side matrix operand
2430  , typename VT2 // Type of the right-hand side vector operand
2431  , typename ST2 > // Type of the scalar value
2432  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2433  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2434  {
2435  using boost::numeric_cast;
2436 
2440 
2441  const int M ( numeric_cast<int>( A.rows() ) );
2442  const int N ( numeric_cast<int>( A.columns() ) );
2443  const int lda( numeric_cast<int>( A.spacing() ) );
2444 
2445  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2446  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2447  }
2448 #endif
2449  //**********************************************************************************************
2450 
2451  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2452 #if BLAZE_BLAS_MODE
2453 
2466  template< typename VT1 // Type of the left-hand side target vector
2467  , typename MT1 // Type of the left-hand side matrix operand
2468  , typename VT2 // Type of the right-hand side vector operand
2469  , typename ST2 > // Type of the scalar value
2470  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2471  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2472  {
2473  using boost::numeric_cast;
2474 
2478 
2479  const int M ( numeric_cast<int>( A.rows() ) );
2480  const int N ( numeric_cast<int>( A.columns() ) );
2481  const int lda( numeric_cast<int>( A.spacing() ) );
2482 
2483  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2484  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2485  }
2486 #endif
2487  //**********************************************************************************************
2488 
2489  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2490 #if BLAZE_BLAS_MODE
2491 
2504  template< typename VT1 // Type of the left-hand side target vector
2505  , typename MT1 // Type of the left-hand side matrix operand
2506  , typename VT2 // Type of the right-hand side vector operand
2507  , typename ST2 > // Type of the scalar value
2508  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2509  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2510  {
2511  using boost::numeric_cast;
2512 
2516  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2517  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2518  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2519 
2520  const int M ( numeric_cast<int>( A.rows() ) );
2521  const int N ( numeric_cast<int>( A.columns() ) );
2522  const int lda( numeric_cast<int>( A.spacing() ) );
2523  const complex<float> alpha( scalar );
2524  const complex<float> beta ( 1.0F, 0.0F );
2525 
2526  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2527  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2528  }
2529 #endif
2530  //**********************************************************************************************
2531 
2532  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2533 #if BLAZE_BLAS_MODE
2534 
2547  template< typename VT1 // Type of the left-hand side target vector
2548  , typename MT1 // Type of the left-hand side matrix operand
2549  , typename VT2 // Type of the right-hand side vector operand
2550  , typename ST2 > // Type of the scalar value
2551  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2552  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2553  {
2554  using boost::numeric_cast;
2555 
2559  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2560  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2561  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2562 
2563  const int M ( numeric_cast<int>( A.rows() ) );
2564  const int N ( numeric_cast<int>( A.columns() ) );
2565  const int lda( numeric_cast<int>( A.spacing() ) );
2566  const complex<double> alpha( scalar );
2567  const complex<double> beta ( 1.0, 0.0 );
2568 
2569  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2570  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2571  }
2572 #endif
2573  //**********************************************************************************************
2574 
2575  //**Addition assignment to sparse vectors*******************************************************
2576  // No special implementation for the addition assignment to sparse vectors.
2577  //**********************************************************************************************
2578 
2579  //**Subtraction assignment to dense vectors*****************************************************
2591  template< typename VT1 > // Type of the target dense vector
2592  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2593  {
2595 
2596  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2597 
2598  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2599  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2600 
2601  if( left.rows() == 0UL || left.columns() == 0UL ) {
2602  return;
2603  }
2604 
2605  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2606  RT x( right ); // Evaluation of the right-hand side dense vector operand
2607 
2608  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2609  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2610  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2611  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2612 
2613  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2614  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2615  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2616  else
2617  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2618  }
2619  //**********************************************************************************************
2620 
2621  //**Default subtraction assignment to dense vectors*********************************************
2635  template< typename VT1 // Type of the left-hand side target vector
2636  , typename MT1 // Type of the left-hand side matrix operand
2637  , typename VT2 // Type of the right-hand side vector operand
2638  , typename ST2 > // Type of the scalar value
2639  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2640  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2641  {
2642  y.subAssign( A * x * scalar );
2643  }
2644  //**********************************************************************************************
2645 
2646  //**Vectorized default subtraction assignment to dense vectors**********************************
2660  template< typename VT1 // Type of the left-hand side target vector
2661  , typename MT1 // Type of the left-hand side matrix operand
2662  , typename VT2 // Type of the right-hand side vector operand
2663  , typename ST2 > // Type of the scalar value
2664  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2665  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2666  {
2667  typedef IntrinsicTrait<ElementType> IT;
2668 
2669  const size_t M( A.rows() );
2670  const size_t N( A.columns() );
2671 
2672  const IntrinsicType factor( set( scalar ) );
2673 
2674  size_t i( 0UL );
2675 
2676  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2677  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2678  for( size_t j=0UL; j<N; ++j ) {
2679  const IntrinsicType x1( set( x[j] ) );
2680  xmm1 = xmm1 + A.load(i ,j) * x1;
2681  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2682  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2683  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2684  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2685  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2686  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2687  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2688  }
2689  y.store( i , y.load(i ) - xmm1*factor );
2690  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
2691  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
2692  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
2693  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
2694  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
2695  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
2696  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
2697  }
2698  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2699  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2700  for( size_t j=0UL; j<N; ++j ) {
2701  const IntrinsicType x1( set( x[j] ) );
2702  xmm1 = xmm1 + A.load(i ,j) * x1;
2703  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2704  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2705  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2706  }
2707  y.store( i , y.load(i ) - xmm1*factor );
2708  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
2709  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
2710  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
2711  }
2712  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2713  IntrinsicType xmm1, xmm2, xmm3;
2714  for( size_t j=0UL; j<N; ++j ) {
2715  const IntrinsicType x1( set( x[j] ) );
2716  xmm1 = xmm1 + A.load(i ,j) * x1;
2717  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2718  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2719  }
2720  y.store( i , y.load(i ) - xmm1*factor );
2721  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
2722  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
2723  }
2724  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2725  IntrinsicType xmm1, xmm2;
2726  for( size_t j=0UL; j<N; ++j ) {
2727  const IntrinsicType x1( set( x[j] ) );
2728  xmm1 = xmm1 + A.load(i ,j) * x1;
2729  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2730  }
2731  y.store( i , y.load(i ) - xmm1*factor );
2732  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
2733  }
2734  if( i < M ) {
2735  IntrinsicType xmm1;
2736  for( size_t j=0UL; j<N; ++j ) {
2737  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
2738  }
2739  y.store( i, y.load(i) - xmm1*factor );
2740  }
2741  }
2742  //**********************************************************************************************
2743 
2744  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2758  template< typename VT1 // Type of the left-hand side target vector
2759  , typename MT1 // Type of the left-hand side matrix operand
2760  , typename VT2 // Type of the right-hand side vector operand
2761  , typename ST2 > // Type of the scalar value
2762  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2763  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2764  {
2765  selectDefaultSubAssignKernel( y, A, x, scalar );
2766  }
2767  //**********************************************************************************************
2768 
2769  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2770 #if BLAZE_BLAS_MODE
2771 
2784  template< typename VT1 // Type of the left-hand side target vector
2785  , typename MT1 // Type of the left-hand side matrix operand
2786  , typename VT2 // Type of the right-hand side vector operand
2787  , typename ST2 > // Type of the scalar value
2788  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2789  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2790  {
2791  using boost::numeric_cast;
2792 
2796 
2797  const int M ( numeric_cast<int>( A.rows() ) );
2798  const int N ( numeric_cast<int>( A.columns() ) );
2799  const int lda( numeric_cast<int>( A.spacing() ) );
2800 
2801  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2802  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2803  }
2804 #endif
2805  //**********************************************************************************************
2806 
2807  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2808 #if BLAZE_BLAS_MODE
2809 
2822  template< typename VT1 // Type of the left-hand side target vector
2823  , typename MT1 // Type of the left-hand side matrix operand
2824  , typename VT2 // Type of the right-hand side vector operand
2825  , typename ST2 > // Type of the scalar value
2826  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2827  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2828  {
2829  using boost::numeric_cast;
2830 
2834 
2835  const int M ( numeric_cast<int>( A.rows() ) );
2836  const int N ( numeric_cast<int>( A.columns() ) );
2837  const int lda( numeric_cast<int>( A.spacing() ) );
2838 
2839  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
2840  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2841  }
2842 #endif
2843  //**********************************************************************************************
2844 
2845  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2846 #if BLAZE_BLAS_MODE
2847 
2860  template< typename VT1 // Type of the left-hand side target vector
2861  , typename MT1 // Type of the left-hand side matrix operand
2862  , typename VT2 // Type of the right-hand side vector operand
2863  , typename ST2 > // Type of the scalar value
2864  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2865  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2866  {
2867  using boost::numeric_cast;
2868 
2872  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2873  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2874  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2875 
2876  const int M ( numeric_cast<int>( A.rows() ) );
2877  const int N ( numeric_cast<int>( A.columns() ) );
2878  const int lda( numeric_cast<int>( A.spacing() ) );
2879  const complex<float> alpha( -scalar );
2880  const complex<float> beta ( 1.0F, 0.0F );
2881 
2882  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2883  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2884  }
2885 #endif
2886  //**********************************************************************************************
2887 
2888  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2889 #if BLAZE_BLAS_MODE
2890 
2903  template< typename VT1 // Type of the left-hand side target vector
2904  , typename MT1 // Type of the left-hand side matrix operand
2905  , typename VT2 // Type of the right-hand side vector operand
2906  , typename ST2 > // Type of the scalar value
2907  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2908  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2909  {
2910  using boost::numeric_cast;
2911 
2915  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2916  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2917  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2918 
2919  const int M ( numeric_cast<int>( A.rows() ) );
2920  const int N ( numeric_cast<int>( A.columns() ) );
2921  const int lda( numeric_cast<int>( A.spacing() ) );
2922  const complex<double> alpha( -scalar );
2923  const complex<double> beta ( 1.0, 0.0 );
2924 
2925  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2926  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2927  }
2928 #endif
2929  //**********************************************************************************************
2930 
2931  //**Subtraction assignment to sparse vectors****************************************************
2932  // No special implementation for the subtraction assignment to sparse vectors.
2933  //**********************************************************************************************
2934 
2935  //**Multiplication assignment to dense vectors**************************************************
2947  template< typename VT1 > // Type of the target dense vector
2948  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2949  {
2951 
2955 
2956  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2957 
2958  const ResultType tmp( rhs );
2959  multAssign( ~lhs, tmp );
2960  }
2961  //**********************************************************************************************
2962 
2963  //**Multiplication assignment to sparse vectors*************************************************
2964  // No special implementation for the multiplication assignment to sparse vectors.
2965  //**********************************************************************************************
2966 
2967  //**Compile time checks*************************************************************************
2976  //**********************************************************************************************
2977 };
2979 //*************************************************************************************************
2980 
2981 
2982 
2983 
2984 //=================================================================================================
2985 //
2986 // GLOBAL BINARY ARITHMETIC OPERATORS
2987 //
2988 //=================================================================================================
2989 
2990 //*************************************************************************************************
3021 template< typename T1 // Type of the left-hand side dense matrix
3022  , typename T2 > // Type of the right-hand side dense vector
3023 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
3025 {
3027 
3028  if( (~mat).columns() != (~vec).size() )
3029  throw std::invalid_argument( "Matrix and vector sizes do not match" );
3030 
3031  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
3032 }
3033 //*************************************************************************************************
3034 
3035 
3036 
3037 
3038 //=================================================================================================
3039 //
3040 // EXPRESSION TRAIT SPECIALIZATIONS
3041 //
3042 //=================================================================================================
3043 
3044 //*************************************************************************************************
3046 template< typename MT, typename VT >
3047 struct SubvectorExprTrait< TDMatDVecMultExpr<MT,VT> >
3048 {
3049  public:
3050  //**********************************************************************************************
3051  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT>::Type, VT >::Type Type;
3052  //**********************************************************************************************
3053 };
3055 //*************************************************************************************************
3056 
3057 } // namespace blaze
3058 
3059 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:242
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:109
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:236
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:262
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
Header file for the DenseVector base class.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:229
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:355
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:122
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:250
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:239
Header file for the multiplication trait.
Header file for the IsDouble type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:356
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:108
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:314
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDMatDVecMultExpr.h:357
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:232
Constraints on the storage order of matrix types.
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:233
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:245
Header file for the EnableIf class template.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:304
Header file for the IsNumeric type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:348
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:110
Header file for the SubmatrixExprTrait class template.
System settings for the BLAS mode.
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:228
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:231
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
const size_t TDMATDVECMULT_THRESHOLD
Column-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:68
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:336
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:111
Header file for all intrinsic functionality.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:230
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:277
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:227
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:154
Header file for basic type definitions.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:113
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the MatVecMultExpr base class.
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:324
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:112
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.