All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
67 #include <blaze/system/BLAS.h>
69 #include <blaze/util/Assert.h>
70 #include <blaze/util/Complex.h>
76 #include <blaze/util/DisableIf.h>
77 #include <blaze/util/EnableIf.h>
79 #include <blaze/util/SelectType.h>
80 #include <blaze/util/Types.h>
86 
87 
88 namespace blaze {
89 
90 //=================================================================================================
91 //
92 // CLASS TDMATDVECMULTEXPR
93 //
94 //=================================================================================================
95 
96 //*************************************************************************************************
103 template< typename MT // Type of the left-hand side dense matrix
104  , typename VT > // Type of the right-hand side dense vector
105 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
106  , private MatVecMultExpr
107  , private Computation
108 {
109  private:
110  //**Type definitions****************************************************************************
111  typedef typename MT::ResultType MRT;
112  typedef typename VT::ResultType VRT;
113  typedef typename MRT::ElementType MET;
114  typedef typename VRT::ElementType VET;
115  typedef typename MT::CompositeType MCT;
116  typedef typename VT::CompositeType VCT;
117  //**********************************************************************************************
118 
119  //**********************************************************************************************
121  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
123  //**********************************************************************************************
124 
125  //**********************************************************************************************
127  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
128  //**********************************************************************************************
129 
130  //**********************************************************************************************
132 
135  template< typename T1, typename T2, typename T3 >
136  struct UseSMPAssignKernel {
137  enum { value = evaluateMatrix || evaluateVector };
138  };
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144 
148  template< typename T1, typename T2, typename T3 >
149  struct UseSinglePrecisionKernel {
150  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
154  };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
164  template< typename T1, typename T2, typename T3 >
165  struct UseDoublePrecisionKernel {
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
180  template< typename T1, typename T2, typename T3 >
181  struct UseSinglePrecisionComplexKernel {
182  typedef complex<float> Type;
183  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseDoublePrecisionComplexKernel {
199  typedef complex<double> Type;
200  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
213  template< typename T1, typename T2, typename T3 >
214  struct UseDefaultKernel {
215  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
216  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
217  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
218  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
219  };
221  //**********************************************************************************************
222 
223  //**********************************************************************************************
225 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
236  };
238  //**********************************************************************************************
239 
240  public:
241  //**Type definitions****************************************************************************
247  typedef const ElementType ReturnType;
248  typedef const ResultType CompositeType;
249 
251  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
252 
254  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
255 
258 
261  //**********************************************************************************************
262 
263  //**Compilation flags***************************************************************************
265  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
269 
271  enum { smpAssignable = !evaluateMatrix && !evaluateVector };
272  //**********************************************************************************************
273 
274  //**Constructor*********************************************************************************
280  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
281  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
282  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
283  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
284  {
285  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
286  }
287  //**********************************************************************************************
288 
289  //**Subscript operator**************************************************************************
295  inline ReturnType operator[]( size_t index ) const {
296  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
297 
298  ElementType res;
299 
300  if( mat_.columns() != 0UL ) {
301  res = mat_(index,0UL) * vec_[0UL];
302  for( size_t j=1UL; j<end_; j+=2UL ) {
303  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
304  }
305  if( end_ < mat_.columns() ) {
306  res += mat_(index,end_) * vec_[end_];
307  }
308  }
309  else {
310  reset( res );
311  }
312 
313  return res;
314  }
315  //**********************************************************************************************
316 
317  //**Size function*******************************************************************************
322  inline size_t size() const {
323  return mat_.rows();
324  }
325  //**********************************************************************************************
326 
327  //**Left operand access*************************************************************************
332  inline LeftOperand leftOperand() const {
333  return mat_;
334  }
335  //**********************************************************************************************
336 
337  //**Right operand access************************************************************************
342  inline RightOperand rightOperand() const {
343  return vec_;
344  }
345  //**********************************************************************************************
346 
347  //**********************************************************************************************
353  template< typename T >
354  inline bool canAlias( const T* alias ) const {
355  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
356  }
357  //**********************************************************************************************
358 
359  //**********************************************************************************************
365  template< typename T >
366  inline bool isAliased( const T* alias ) const {
367  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
368  }
369  //**********************************************************************************************
370 
371  //**********************************************************************************************
376  inline bool isAligned() const {
377  return mat_.isAligned() && vec_.isAligned();
378  }
379  //**********************************************************************************************
380 
381  //**********************************************************************************************
386  inline bool canSMPAssign() const {
387  return ( !BLAZE_BLAS_IS_PARALLEL ||
388  ( IsComputation<MT>::value && !evaluateMatrix ) ||
389  ( mat_.rows() * mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
391  }
392  //**********************************************************************************************
393 
394  private:
395  //**Member variables****************************************************************************
398  const size_t end_;
399  //**********************************************************************************************
400 
401  //**Assignment to dense vectors*****************************************************************
414  template< typename VT1 > // Type of the target dense vector
415  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
416  {
418 
419  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
420 
421  if( rhs.mat_.rows() == 0UL ) {
422  return;
423  }
424  else if( rhs.mat_.columns() == 0UL ) {
425  reset( ~lhs );
426  return;
427  }
428 
429  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
430  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
431 
432  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
433  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
434  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
435  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
436 
437  TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
438  }
440  //**********************************************************************************************
441 
442  //**Assignment to dense vectors (kernel selection)**********************************************
453  template< typename VT1 // Type of the left-hand side target vector
454  , typename MT1 // Type of the left-hand side matrix operand
455  , typename VT2 > // Type of the right-hand side vector operand
456  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
457  selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
458  {
459  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
460  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
461  TDMatDVecMultExpr::selectDefaultAssignKernel( y, A, x );
462  else
463  TDMatDVecMultExpr::selectBlasAssignKernel( y, A, x );
464  }
466  //**********************************************************************************************
467 
468  //**Assignment to dense vectors (kernel selection)**********************************************
479  template< typename VT1 // Type of the left-hand side target vector
480  , typename MT1 // Type of the left-hand side matrix operand
481  , typename VT2 > // Type of the right-hand side vector operand
482  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
483  selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
484  {
485  smpAssign( y, A * x );
486  }
488  //**********************************************************************************************
489 
490  //**Default assignment to dense vectors*********************************************************
504  template< typename VT1 // Type of the left-hand side target vector
505  , typename MT1 // Type of the left-hand side matrix operand
506  , typename VT2 > // Type of the right-hand side vector operand
507  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
508  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
509  {
510  const size_t M( A.rows() );
511  const size_t N( A.columns() );
512 
513  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
514  const size_t iend( M & size_t(-2) );
515 
516  for( size_t i=0UL; i<M; ++i ) {
517  y[i] = x[0UL] * A(i,0UL);
518  }
519  for( size_t j=1UL; j<N; ++j ) {
520  for( size_t i=0UL; i<iend; i+=2UL ) {
521  y[i ] += x[j] * A(i ,j);
522  y[i+1UL] += x[j] * A(i+1UL,j);
523  }
524  if( iend < M ) {
525  y[iend] += x[j] * A(iend,j);
526  }
527  }
528  }
530  //**********************************************************************************************
531 
532  //**Vectorized default assignment to dense vectors**********************************************
546  template< typename VT1 // Type of the left-hand side target vector
547  , typename MT1 // Type of the left-hand side matrix operand
548  , typename VT2 > // Type of the right-hand side vector operand
549  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
550  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
551  {
552  typedef IntrinsicTrait<ElementType> IT;
553 
554  const size_t M( A.rows() );
555  const size_t N( A.columns() );
556 
557  size_t i( 0UL );
558 
559  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
560  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
561  for( size_t j=0UL; j<N; ++j ) {
562  const IntrinsicType x1( set( x[j] ) );
563  xmm1 = xmm1 + A.load(i ,j) * x1;
564  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
565  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
566  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
567  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
568  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
569  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
570  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
571  }
572  y.store( i , xmm1 );
573  y.store( i+IT::size , xmm2 );
574  y.store( i+IT::size*2UL, xmm3 );
575  y.store( i+IT::size*3UL, xmm4 );
576  y.store( i+IT::size*4UL, xmm5 );
577  y.store( i+IT::size*5UL, xmm6 );
578  y.store( i+IT::size*6UL, xmm7 );
579  y.store( i+IT::size*7UL, xmm8 );
580  }
581  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
582  IntrinsicType xmm1, xmm2, xmm3, xmm4;
583  for( size_t j=0UL; j<N; ++j ) {
584  const IntrinsicType x1( set( x[j] ) );
585  xmm1 = xmm1 + A.load(i ,j) * x1;
586  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
587  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
588  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
589  }
590  y.store( i , xmm1 );
591  y.store( i+IT::size , xmm2 );
592  y.store( i+IT::size*2UL, xmm3 );
593  y.store( i+IT::size*3UL, xmm4 );
594  }
595  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
596  IntrinsicType xmm1, xmm2, xmm3;
597  for( size_t j=0UL; j<N; ++j ) {
598  const IntrinsicType x1( set( x[j] ) );
599  xmm1 = xmm1 + A.load(i ,j) * x1;
600  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
601  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
602  }
603  y.store( i , xmm1 );
604  y.store( i+IT::size , xmm2 );
605  y.store( i+IT::size*2UL, xmm3 );
606  }
607  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
608  IntrinsicType xmm1, xmm2;
609  for( size_t j=0UL; j<N; ++j ) {
610  const IntrinsicType x1( set( x[j] ) );
611  xmm1 = xmm1 + A.load(i ,j) * x1;
612  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
613  }
614  y.store( i , xmm1 );
615  y.store( i+IT::size, xmm2 );
616  }
617  if( i < M ) {
618  IntrinsicType xmm1;
619  for( size_t j=0UL; j<N; ++j ) {
620  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
621  }
622  y.store( i, xmm1 );
623  }
624  }
626  //**********************************************************************************************
627 
628  //**BLAS-based assignment to dense vectors (default)********************************************
642  template< typename VT1 // Type of the left-hand side target vector
643  , typename MT1 // Type of the left-hand side matrix operand
644  , typename VT2 > // Type of the right-hand side vector operand
645  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
646  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
647  {
648  selectDefaultAssignKernel( y, A, x );
649  }
651  //**********************************************************************************************
652 
653  //**BLAS-based assignment to dense vectors (single precision)***********************************
654 #if BLAZE_BLAS_MODE
655 
668  template< typename VT1 // Type of the left-hand side target vector
669  , typename MT1 // Type of the left-hand side matrix operand
670  , typename VT2 > // Type of the right-hand side vector operand
671  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
672  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
673  {
674  using boost::numeric_cast;
675 
679 
680  const int M ( numeric_cast<int>( A.rows() ) );
681  const int N ( numeric_cast<int>( A.columns() ) );
682  const int lda( numeric_cast<int>( A.spacing() ) );
683 
684  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
685  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
686  }
688 #endif
689  //**********************************************************************************************
690 
691  //**BLAS-based assignment to dense vectors (double precision)***********************************
692 #if BLAZE_BLAS_MODE
693 
706  template< typename VT1 // Type of the left-hand side target vector
707  , typename MT1 // Type of the left-hand side matrix operand
708  , typename VT2 > // Type of the right-hand side vector operand
709  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
710  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
711  {
712  using boost::numeric_cast;
713 
717 
718  const int M ( numeric_cast<int>( A.rows() ) );
719  const int N ( numeric_cast<int>( A.columns() ) );
720  const int lda( numeric_cast<int>( A.spacing() ) );
721 
722  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
723  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
724  }
726 #endif
727  //**********************************************************************************************
728 
729  //**BLAS-based assignment to dense vectors (single precision complex)***************************
730 #if BLAZE_BLAS_MODE
731 
744  template< typename VT1 // Type of the left-hand side target vector
745  , typename MT1 // Type of the left-hand side matrix operand
746  , typename VT2 > // Type of the right-hand side vector operand
747  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
748  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
749  {
750  using boost::numeric_cast;
751 
755  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
756  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
757  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
758 
759  const int M ( numeric_cast<int>( A.rows() ) );
760  const int N ( numeric_cast<int>( A.columns() ) );
761  const int lda( numeric_cast<int>( A.spacing() ) );
762  const complex<float> alpha( 1.0F, 0.0F );
763  const complex<float> beta ( 0.0F, 0.0F );
764 
765  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
766  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
767  }
769 #endif
770  //**********************************************************************************************
771 
772  //**BLAS-based assignment to dense vectors (double precision complex)***************************
773 #if BLAZE_BLAS_MODE
774 
787  template< typename VT1 // Type of the left-hand side target vector
788  , typename MT1 // Type of the left-hand side matrix operand
789  , typename VT2 > // Type of the right-hand side vector operand
790  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
791  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
792  {
793  using boost::numeric_cast;
794 
798  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
799  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
800  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
801 
802  const int M ( numeric_cast<int>( A.rows() ) );
803  const int N ( numeric_cast<int>( A.columns() ) );
804  const int lda( numeric_cast<int>( A.spacing() ) );
805  const complex<double> alpha( 1.0, 0.0 );
806  const complex<double> beta ( 0.0, 0.0 );
807 
808  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
809  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
810  }
812 #endif
813  //**********************************************************************************************
814 
815  //**Assignment to sparse vectors****************************************************************
828  template< typename VT1 > // Type of the target sparse vector
829  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
830  {
832 
836 
837  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
838 
839  const ResultType tmp( rhs );
840  smpAssign( ~lhs, tmp );
841  }
843  //**********************************************************************************************
844 
845  //**Addition assignment to dense vectors********************************************************
858  template< typename VT1 > // Type of the target dense vector
859  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
860  {
862 
863  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
864 
865  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
866  return;
867  }
868 
869  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
870  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
871 
872  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
873  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
874  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
875  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
876 
877  TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
878  }
880  //**********************************************************************************************
881 
882  //**Addition assignment to dense vectors (kernel selection)*************************************
893  template< typename VT1 // Type of the left-hand side target vector
894  , typename MT1 // Type of the left-hand side matrix operand
895  , typename VT2 > // Type of the right-hand side vector operand
896  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
897  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
898  {
899  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
900  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
901  TDMatDVecMultExpr::selectDefaultAddAssignKernel( y, A, x );
902  else
903  TDMatDVecMultExpr::selectBlasAddAssignKernel( y, A, x );
904  }
906  //**********************************************************************************************
907 
908  //**Addition assignment to dense vectors (kernel selection)*************************************
919  template< typename VT1 // Type of the left-hand side target vector
920  , typename MT1 // Type of the left-hand side matrix operand
921  , typename VT2 > // Type of the right-hand side vector operand
922  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
923  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
924  {
925  smpAddAssign( y, A * x );
926  }
928  //**********************************************************************************************
929 
930  //**Default addition assignment to dense vectors************************************************
944  template< typename VT1 // Type of the left-hand side target vector
945  , typename MT1 // Type of the left-hand side matrix operand
946  , typename VT2 > // Type of the right-hand side vector operand
947  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
948  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
949  {
950  const size_t M( A.rows() );
951  const size_t N( A.columns() );
952 
953  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
954  const size_t iend( M & size_t(-2) );
955 
956  for( size_t j=0UL; j<N; ++j ) {
957  for( size_t i=0UL; i<iend; i+=2UL ) {
958  y[i ] += x[j] * A(i ,j);
959  y[i+1UL] += x[j] * A(i+1UL,j);
960  }
961  if( iend < M ) {
962  y[iend] += x[j] * A(iend,j);
963  }
964  }
965  }
967  //**********************************************************************************************
968 
969  //**Vectorized default addition assignment to dense vectors*************************************
983  template< typename VT1 // Type of the left-hand side target vector
984  , typename MT1 // Type of the left-hand side matrix operand
985  , typename VT2 > // Type of the right-hand side vector operand
986  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
987  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
988  {
989  typedef IntrinsicTrait<ElementType> IT;
990 
991  const size_t M( A.rows() );
992  const size_t N( A.columns() );
993 
994  size_t i( 0UL );
995 
996  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
997  IntrinsicType xmm1( y.load(i ) );
998  IntrinsicType xmm2( y.load(i+IT::size ) );
999  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1000  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1001  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1002  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1003  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1004  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1005  for( size_t j=0UL; j<N; ++j ) {
1006  const IntrinsicType x1( set( x[j] ) );
1007  xmm1 = xmm1 + A.load(i ,j) * x1;
1008  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1009  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1010  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1011  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1012  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1013  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1014  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1015  }
1016  y.store( i , xmm1 );
1017  y.store( i+IT::size , xmm2 );
1018  y.store( i+IT::size*2UL, xmm3 );
1019  y.store( i+IT::size*3UL, xmm4 );
1020  y.store( i+IT::size*4UL, xmm5 );
1021  y.store( i+IT::size*5UL, xmm6 );
1022  y.store( i+IT::size*6UL, xmm7 );
1023  y.store( i+IT::size*7UL, xmm8 );
1024  }
1025  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1026  IntrinsicType xmm1( y.load(i ) );
1027  IntrinsicType xmm2( y.load(i+IT::size ) );
1028  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1029  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1030  for( size_t j=0UL; j<N; ++j ) {
1031  const IntrinsicType x1( set( x[j] ) );
1032  xmm1 = xmm1 + A.load(i ,j) * x1;
1033  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1034  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1035  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1036  }
1037  y.store( i , xmm1 );
1038  y.store( i+IT::size , xmm2 );
1039  y.store( i+IT::size*2UL, xmm3 );
1040  y.store( i+IT::size*3UL, xmm4 );
1041  }
1042  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1043  IntrinsicType xmm1( y.load(i ) );
1044  IntrinsicType xmm2( y.load(i+IT::size ) );
1045  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1046  for( size_t j=0UL; j<N; ++j ) {
1047  const IntrinsicType x1( set( x[j] ) );
1048  xmm1 = xmm1 + A.load(i ,j) * x1;
1049  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1050  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1051  }
1052  y.store( i , xmm1 );
1053  y.store( i+IT::size , xmm2 );
1054  y.store( i+IT::size*2UL, xmm3 );
1055  }
1056  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1057  IntrinsicType xmm1( y.load(i ) );
1058  IntrinsicType xmm2( y.load(i+IT::size) );
1059  for( size_t j=0UL; j<N; ++j ) {
1060  const IntrinsicType x1( set( x[j] ) );
1061  xmm1 = xmm1 + A.load(i ,j) * x1;
1062  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1063  }
1064  y.store( i , xmm1 );
1065  y.store( i+IT::size, xmm2 );
1066  }
1067  if( i < M ) {
1068  IntrinsicType xmm1( y.load(i) );
1069  for( size_t j=0UL; j<N; ++j ) {
1070  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
1071  }
1072  y.store( i, xmm1 );
1073  }
1074  }
1076  //**********************************************************************************************
1077 
1078  //**BLAS-based addition assignment to dense vectors (default)***********************************
1092  template< typename VT1 // Type of the left-hand side target vector
1093  , typename MT1 // Type of the left-hand side matrix operand
1094  , typename VT2 > // Type of the right-hand side vector operand
1095  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1096  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1097  {
1098  selectDefaultAddAssignKernel( y, A, x );
1099  }
1101  //**********************************************************************************************
1102 
1103  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1104 #if BLAZE_BLAS_MODE
1105 
1118  template< typename VT1 // Type of the left-hand side target vector
1119  , typename MT1 // Type of the left-hand side matrix operand
1120  , typename VT2 > // Type of the right-hand side vector operand
1121  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1122  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1123  {
1124  using boost::numeric_cast;
1125 
1129 
1130  const int M ( numeric_cast<int>( A.rows() ) );
1131  const int N ( numeric_cast<int>( A.columns() ) );
1132  const int lda( numeric_cast<int>( A.spacing() ) );
1133 
1134  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
1135  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1136  }
1138 #endif
1139  //**********************************************************************************************
1140 
1141  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1142 #if BLAZE_BLAS_MODE
1143 
1156  template< typename VT1 // Type of the left-hand side target vector
1157  , typename MT1 // Type of the left-hand side matrix operand
1158  , typename VT2 > // Type of the right-hand side vector operand
1159  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1160  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1161  {
1162  using boost::numeric_cast;
1163 
1167 
1168  const int M ( numeric_cast<int>( A.rows() ) );
1169  const int N ( numeric_cast<int>( A.columns() ) );
1170  const int lda( numeric_cast<int>( A.spacing() ) );
1171 
1172  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
1173  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1174  }
1176 #endif
1177  //**********************************************************************************************
1178 
1179  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1180 #if BLAZE_BLAS_MODE
1181 
1194  template< typename VT1 // Type of the left-hand side target vector
1195  , typename MT1 // Type of the left-hand side matrix operand
1196  , typename VT2 > // Type of the right-hand side vector operand
1197  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1198  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1199  {
1200  using boost::numeric_cast;
1201 
1205  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1206  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1207  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1208 
1209  const int M ( numeric_cast<int>( A.rows() ) );
1210  const int N ( numeric_cast<int>( A.columns() ) );
1211  const int lda( numeric_cast<int>( A.spacing() ) );
1212  const complex<float> alpha( 1.0F, 0.0F );
1213  const complex<float> beta ( 1.0F, 0.0F );
1214 
1215  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1216  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1217  }
1219 #endif
1220  //**********************************************************************************************
1221 
1222  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1223 #if BLAZE_BLAS_MODE
1224 
1237  template< typename VT1 // Type of the left-hand side target vector
1238  , typename MT1 // Type of the left-hand side matrix operand
1239  , typename VT2 > // Type of the right-hand side vector operand
1240  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1241  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1242  {
1243  using boost::numeric_cast;
1244 
1248  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1249  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1250  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1251 
1252  const int M ( numeric_cast<int>( A.rows() ) );
1253  const int N ( numeric_cast<int>( A.columns() ) );
1254  const int lda( numeric_cast<int>( A.spacing() ) );
1255  const complex<double> alpha( 1.0, 0.0 );
1256  const complex<double> beta ( 1.0, 0.0 );
1257 
1258  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1259  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1260  }
1262 #endif
1263  //**********************************************************************************************
1264 
1265  //**Addition assignment to sparse vectors*******************************************************
1266  // No special implementation for the addition assignment to sparse vectors.
1267  //**********************************************************************************************
1268 
1269  //**Subtraction assignment to dense vectors*****************************************************
1282  template< typename VT1 > // Type of the target dense vector
1283  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1284  {
1286 
1287  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1288 
1289  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1290  return;
1291  }
1292 
1293  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1294  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1295 
1296  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1297  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1298  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1299  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1300 
1301  TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1302  }
1304  //**********************************************************************************************
1305 
1306  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1317  template< typename VT1 // Type of the left-hand side target vector
1318  , typename MT1 // Type of the left-hand side matrix operand
1319  , typename VT2 > // Type of the right-hand side vector operand
1320  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
1321  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1322  {
1323  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1324  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1325  TDMatDVecMultExpr::selectDefaultSubAssignKernel( y, A, x );
1326  else
1327  TDMatDVecMultExpr::selectBlasSubAssignKernel( y, A, x );
1328  }
1330  //**********************************************************************************************
1331 
1332  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1343  template< typename VT1 // Type of the left-hand side target vector
1344  , typename MT1 // Type of the left-hand side matrix operand
1345  , typename VT2 > // Type of the right-hand side vector operand
1346  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
1347  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1348  {
1349  smpSubAssign( y, A * x );
1350  }
1352  //**********************************************************************************************
1353 
1354  //**Default subtraction assignment to dense vectors*********************************************
1368  template< typename VT1 // Type of the left-hand side target vector
1369  , typename MT1 // Type of the left-hand side matrix operand
1370  , typename VT2 > // Type of the right-hand side vector operand
1371  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1372  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1373  {
1374  const size_t M( A.rows() );
1375  const size_t N( A.columns() );
1376 
1377  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1378  const size_t iend( M & size_t(-2) );
1379 
1380  for( size_t j=0UL; j<N; ++j ) {
1381  for( size_t i=0UL; i<iend; i+=2UL ) {
1382  y[i ] -= x[j] * A(i ,j);
1383  y[i+1UL] -= x[j] * A(i+1UL,j);
1384  }
1385  if( iend < M ) {
1386  y[iend] -= x[j] * A(iend,j);
1387  }
1388  }
1389  }
1391  //**********************************************************************************************
1392 
1393  //**Vectorized default subtraction assignment to dense vectors**********************************
1407  template< typename VT1 // Type of the left-hand side target vector
1408  , typename MT1 // Type of the left-hand side matrix operand
1409  , typename VT2 > // Type of the right-hand side vector operand
1410  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1411  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1412  {
1413  typedef IntrinsicTrait<ElementType> IT;
1414 
1415  const size_t M( A.rows() );
1416  const size_t N( A.columns() );
1417 
1418  size_t i( 0UL );
1419 
1420  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1421  IntrinsicType xmm1( y.load(i ) );
1422  IntrinsicType xmm2( y.load(i+IT::size ) );
1423  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1424  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1425  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1426  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1427  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1428  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1429  for( size_t j=0UL; j<N; ++j ) {
1430  const IntrinsicType x1( set( x[j] ) );
1431  xmm1 = xmm1 - A.load(i ,j) * x1;
1432  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1433  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1434  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1435  xmm5 = xmm5 - A.load(i+IT::size*4UL,j) * x1;
1436  xmm6 = xmm6 - A.load(i+IT::size*5UL,j) * x1;
1437  xmm7 = xmm7 - A.load(i+IT::size*6UL,j) * x1;
1438  xmm8 = xmm8 - A.load(i+IT::size*7UL,j) * x1;
1439  }
1440  y.store( i , xmm1 );
1441  y.store( i+IT::size , xmm2 );
1442  y.store( i+IT::size*2UL, xmm3 );
1443  y.store( i+IT::size*3UL, xmm4 );
1444  y.store( i+IT::size*4UL, xmm5 );
1445  y.store( i+IT::size*5UL, xmm6 );
1446  y.store( i+IT::size*6UL, xmm7 );
1447  y.store( i+IT::size*7UL, xmm8 );
1448  }
1449  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1450  IntrinsicType xmm1( y.load(i ) );
1451  IntrinsicType xmm2( y.load(i+IT::size ) );
1452  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1453  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1454  for( size_t j=0UL; j<N; ++j ) {
1455  const IntrinsicType x1( set( x[j] ) );
1456  xmm1 = xmm1 - A.load(i ,j) * x1;
1457  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1458  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1459  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1460  }
1461  y.store( i , xmm1 );
1462  y.store( i+IT::size , xmm2 );
1463  y.store( i+IT::size*2UL, xmm3 );
1464  y.store( i+IT::size*3UL, xmm4 );
1465  }
1466  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1467  IntrinsicType xmm1( y.load(i ) );
1468  IntrinsicType xmm2( y.load(i+IT::size ) );
1469  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1470  for( size_t j=0UL; j<N; ++j ) {
1471  const IntrinsicType x1( set( x[j] ) );
1472  xmm1 = xmm1 - A.load(i ,j) * x1;
1473  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1474  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1475  }
1476  y.store( i , xmm1 );
1477  y.store( i+IT::size , xmm2 );
1478  y.store( i+IT::size*2UL, xmm3 );
1479  }
1480  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1481  IntrinsicType xmm1( y.load(i ) );
1482  IntrinsicType xmm2( y.load(i+IT::size) );
1483  for( size_t j=0UL; j<N; ++j ) {
1484  const IntrinsicType x1( set( x[j] ) );
1485  xmm1 = xmm1 - A.load(i ,j) * x1;
1486  xmm2 = xmm2 - A.load(i+IT::size,j) * x1;
1487  }
1488  y.store( i , xmm1 );
1489  y.store( i+IT::size, xmm2 );
1490  }
1491  if( i < M ) {
1492  IntrinsicType xmm1( y.load(i) );
1493  for( size_t j=0UL; j<N; ++j ) {
1494  xmm1 = xmm1 - A.load(i,j) * set( x[j] );
1495  }
1496  y.store( i, xmm1 );
1497  }
1498  }
1500  //**********************************************************************************************
1501 
1502  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1516  template< typename VT1 // Type of the left-hand side target vector
1517  , typename MT1 // Type of the left-hand side matrix operand
1518  , typename VT2 > // Type of the right-hand side vector operand
1519  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1520  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1521  {
1522  selectDefaultSubAssignKernel( y, A, x );
1523  }
1525  //**********************************************************************************************
1526 
1527  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1528 #if BLAZE_BLAS_MODE
1529 
1542  template< typename VT1 // Type of the left-hand side target vector
1543  , typename MT1 // Type of the left-hand side matrix operand
1544  , typename VT2 > // Type of the right-hand side vector operand
1545  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1546  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1547  {
1548  using boost::numeric_cast;
1549 
1553 
1554  const int M ( numeric_cast<int>( A.rows() ) );
1555  const int N ( numeric_cast<int>( A.columns() ) );
1556  const int lda( numeric_cast<int>( A.spacing() ) );
1557 
1558  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -1.0F,
1559  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1560  }
1562 #endif
1563  //**********************************************************************************************
1564 
1565  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1566 #if BLAZE_BLAS_MODE
1567 
1580  template< typename VT1 // Type of the left-hand side target vector
1581  , typename MT1 // Type of the left-hand side matrix operand
1582  , typename VT2 > // Type of the right-hand side vector operand
1583  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1584  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1585  {
1586  using boost::numeric_cast;
1587 
1591 
1592  const int M ( numeric_cast<int>( A.rows() ) );
1593  const int N ( numeric_cast<int>( A.columns() ) );
1594  const int lda( numeric_cast<int>( A.spacing() ) );
1595 
1596  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -1.0,
1597  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1598  }
1600 #endif
1601  //**********************************************************************************************
1602 
1603  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1604 #if BLAZE_BLAS_MODE
1605 
1618  template< typename VT1 // Type of the left-hand side target vector
1619  , typename MT1 // Type of the left-hand side matrix operand
1620  , typename VT2 > // Type of the right-hand side vector operand
1621  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1622  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1623  {
1624  using boost::numeric_cast;
1625 
1629  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1630  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1631  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1632 
1633  const int M ( numeric_cast<int>( A.rows() ) );
1634  const int N ( numeric_cast<int>( A.columns() ) );
1635  const int lda( numeric_cast<int>( A.spacing() ) );
1636  const complex<float> alpha( -1.0F, 0.0F );
1637  const complex<float> beta ( 1.0F, 0.0F );
1638 
1639  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1640  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1641  }
1643 #endif
1644  //**********************************************************************************************
1645 
1646  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1647 #if BLAZE_BLAS_MODE
1648 
1661  template< typename VT1 // Type of the left-hand side target vector
1662  , typename MT1 // Type of the left-hand side matrix operand
1663  , typename VT2 > // Type of the right-hand side vector operand
1664  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1665  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1666  {
1667  using boost::numeric_cast;
1668 
1672  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1673  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1674  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1675 
1676  const int M ( numeric_cast<int>( A.rows() ) );
1677  const int N ( numeric_cast<int>( A.columns() ) );
1678  const int lda( numeric_cast<int>( A.spacing() ) );
1679  const complex<double> alpha( -1.0, 0.0 );
1680  const complex<double> beta ( 1.0, 0.0 );
1681 
1682  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1683  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1684  }
1686 #endif
1687  //**********************************************************************************************
1688 
1689  //**Subtraction assignment to sparse vectors****************************************************
1690  // No special implementation for the subtraction assignment to sparse vectors.
1691  //**********************************************************************************************
1692 
1693  //**Multiplication assignment to dense vectors**************************************************
1706  template< typename VT1 > // Type of the target dense vector
1707  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1708  {
1710 
1714 
1715  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1716 
1717  const ResultType tmp( rhs );
1718  multAssign( ~lhs, tmp );
1719  }
1721  //**********************************************************************************************
1722 
1723  //**Multiplication assignment to sparse vectors*************************************************
1724  // No special implementation for the multiplication assignment to sparse vectors.
1725  //**********************************************************************************************
1726 
1727  //**Compile time checks*************************************************************************
1734  //**********************************************************************************************
1735 };
1736 //*************************************************************************************************
1737 
1738 
1739 
1740 
1741 //=================================================================================================
1742 //
1743 // DVECSCALARMULTEXPR SPECIALIZATION
1744 //
1745 //=================================================================================================
1746 
1747 //*************************************************************************************************
1756 template< typename MT // Type of the left-hand side dense matrix
1757  , typename VT // Type of the right-hand side dense vector
1758  , typename ST > // Type of the side scalar value
1759 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
1760  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1761  , private VecScalarMultExpr
1762  , private Computation
1763 {
1764  private:
1765  //**Type definitions****************************************************************************
1766  typedef TDMatDVecMultExpr<MT,VT> MVM;
1767  typedef typename MVM::ResultType RES;
1768  typedef typename MT::ResultType MRT;
1769  typedef typename VT::ResultType VRT;
1770  typedef typename MRT::ElementType MET;
1771  typedef typename VRT::ElementType VET;
1772  typedef typename MT::CompositeType MCT;
1773  typedef typename VT::CompositeType VCT;
1774  //**********************************************************************************************
1775 
1776  //**********************************************************************************************
1778  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1779  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1780  //**********************************************************************************************
1781 
1782  //**********************************************************************************************
1784  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1785  //**********************************************************************************************
1786 
1787  //**********************************************************************************************
1789 
1791  template< typename T1, typename T2, typename T3, typename T4 >
1792  struct UseSMPAssignKernel {
1793  enum { value = evaluateMatrix || evaluateVector };
1794  };
1795  //**********************************************************************************************
1796 
1797  //**********************************************************************************************
1799 
1802  template< typename T1, typename T2, typename T3, typename T4 >
1803  struct UseSinglePrecisionKernel {
1804  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1805  IsFloat<typename T1::ElementType>::value &&
1806  IsFloat<typename T2::ElementType>::value &&
1807  IsFloat<typename T3::ElementType>::value &&
1808  !IsComplex<T4>::value };
1809  };
1810  //**********************************************************************************************
1811 
1812  //**********************************************************************************************
1814 
1817  template< typename T1, typename T2, typename T3, typename T4 >
1818  struct UseDoublePrecisionKernel {
1819  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1820  IsDouble<typename T1::ElementType>::value &&
1821  IsDouble<typename T2::ElementType>::value &&
1822  IsDouble<typename T3::ElementType>::value &&
1823  !IsComplex<T4>::value };
1824  };
1825  //**********************************************************************************************
1826 
1827  //**********************************************************************************************
1829 
1832  template< typename T1, typename T2, typename T3 >
1833  struct UseSinglePrecisionComplexKernel {
1834  typedef complex<float> Type;
1835  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1836  IsSame<typename T1::ElementType,Type>::value &&
1837  IsSame<typename T2::ElementType,Type>::value &&
1838  IsSame<typename T3::ElementType,Type>::value };
1839  };
1840  //**********************************************************************************************
1841 
1842  //**********************************************************************************************
1844 
1847  template< typename T1, typename T2, typename T3 >
1848  struct UseDoublePrecisionComplexKernel {
1849  typedef complex<double> Type;
1850  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1851  IsSame<typename T1::ElementType,Type>::value &&
1852  IsSame<typename T2::ElementType,Type>::value &&
1853  IsSame<typename T3::ElementType,Type>::value };
1854  };
1855  //**********************************************************************************************
1856 
1857  //**********************************************************************************************
1859 
1861  template< typename T1, typename T2, typename T3, typename T4 >
1862  struct UseDefaultKernel {
1863  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1864  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1865  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1866  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1867  };
1868  //**********************************************************************************************
1869 
1870  //**********************************************************************************************
1872 
1875  template< typename T1, typename T2, typename T3, typename T4 >
1876  struct UseVectorizedDefaultKernel {
1877  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1878  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1879  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1880  IsSame<typename T1::ElementType,T4>::value &&
1881  IntrinsicTrait<typename T1::ElementType>::addition &&
1882  IntrinsicTrait<typename T1::ElementType>::multiplication };
1883  };
1884  //**********************************************************************************************
1885 
1886  public:
1887  //**Type definitions****************************************************************************
1888  typedef DVecScalarMultExpr<MVM,ST,false> This;
1889  typedef typename MultTrait<RES,ST>::Type ResultType;
1890  typedef typename ResultType::TransposeType TransposeType;
1891  typedef typename ResultType::ElementType ElementType;
1892  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1893  typedef const ElementType ReturnType;
1894  typedef const ResultType CompositeType;
1895 
1897  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
1898 
1900  typedef ST RightOperand;
1901 
1903  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
1904 
1906  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
1907  //**********************************************************************************************
1908 
1909  //**Compilation flags***************************************************************************
1911  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
1912  IsSame<MET,VET>::value &&
1913  IsSame<MET,ST>::value &&
1914  IntrinsicTrait<MET>::addition &&
1915  IntrinsicTrait<MET>::multiplication };
1916 
1918  enum { smpAssignable = !evaluateMatrix && !evaluateVector };
1919  //**********************************************************************************************
1920 
1921  //**Constructor*********************************************************************************
1927  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1928  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1929  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1930  {}
1931  //**********************************************************************************************
1932 
1933  //**Subscript operator**************************************************************************
1939  inline ReturnType operator[]( size_t index ) const {
1940  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1941  return vector_[index] * scalar_;
1942  }
1943  //**********************************************************************************************
1944 
1945  //**Size function*******************************************************************************
1950  inline size_t size() const {
1951  return vector_.size();
1952  }
1953  //**********************************************************************************************
1954 
1955  //**Left operand access*************************************************************************
1960  inline LeftOperand leftOperand() const {
1961  return vector_;
1962  }
1963  //**********************************************************************************************
1964 
1965  //**Right operand access************************************************************************
1970  inline RightOperand rightOperand() const {
1971  return scalar_;
1972  }
1973  //**********************************************************************************************
1974 
1975  //**********************************************************************************************
1981  template< typename T >
1982  inline bool canAlias( const T* alias ) const {
1983  return vector_.canAlias( alias );
1984  }
1985  //**********************************************************************************************
1986 
1987  //**********************************************************************************************
1993  template< typename T >
1994  inline bool isAliased( const T* alias ) const {
1995  return vector_.isAliased( alias );
1996  }
1997  //**********************************************************************************************
1998 
1999  //**********************************************************************************************
2004  inline bool isAligned() const {
2005  return vector_.isAligned();
2006  }
2007  //**********************************************************************************************
2008 
2009  //**********************************************************************************************
2014  inline bool canSMPAssign() const {
2015  typename MVM::LeftOperand A( vector_.leftOperand() );
2016  return ( !BLAZE_BLAS_IS_PARALLEL ||
2017  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2018  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2020  }
2021  //**********************************************************************************************
2022 
2023  private:
2024  //**Member variables****************************************************************************
2025  LeftOperand vector_;
2026  RightOperand scalar_;
2027  //**********************************************************************************************
2028 
2029  //**Assignment to dense vectors*****************************************************************
2041  template< typename VT1 > // Type of the target dense vector
2042  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2043  {
2045 
2046  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2047 
2048  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2049  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2050 
2051  if( left.rows() == 0UL ) {
2052  return;
2053  }
2054  else if( left.columns() == 0UL ) {
2055  reset( ~lhs );
2056  return;
2057  }
2058 
2059  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2060  RT x( right ); // Evaluation of the right-hand side dense vector operand
2061 
2062  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2063  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2064  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2065  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2066 
2067  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2068  }
2069  //**********************************************************************************************
2070 
2071  //**Assignment to dense vectors (kernel selection)**********************************************
2082  template< typename VT1 // Type of the left-hand side target vector
2083  , typename MT1 // Type of the left-hand side matrix operand
2084  , typename VT2 // Type of the right-hand side vector operand
2085  , typename ST2 > // Type of the scalar value
2086  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2087  selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2088  {
2089  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2090  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2091  DVecScalarMultExpr::selectDefaultAssignKernel( y, A, x, scalar );
2092  else
2093  DVecScalarMultExpr::selectBlasAssignKernel( y, A, x, scalar );
2094  }
2095  //**********************************************************************************************
2096 
2097  //**Assignment to dense vectors (kernel selection)**********************************************
2108  template< typename VT1 // Type of the left-hand side target vector
2109  , typename MT1 // Type of the left-hand side matrix operand
2110  , typename VT2 // Type of the right-hand side vector operand
2111  , typename ST2 > // Type of the scalar value
2112  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2113  selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2114  {
2115  smpAssign( y, A * x * scalar );
2116  }
2117  //**********************************************************************************************
2118 
2119  //**Default assignment to dense vectors*********************************************************
2133  template< typename VT1 // Type of the left-hand side target vector
2134  , typename MT1 // Type of the left-hand side matrix operand
2135  , typename VT2 // Type of the right-hand side vector operand
2136  , typename ST2 > // Type of the scalar value
2137  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2138  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2139  {
2140  const size_t M( A.rows() );
2141  const size_t N( A.columns() );
2142 
2143  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
2144  const size_t iend( M & size_t(-2) );
2145 
2146  for( size_t i=0UL; i<M; ++i ) {
2147  y[i] = x[0UL] * A(i,0UL);
2148  }
2149  for( size_t j=1UL; j<N; ++j ) {
2150  for( size_t i=0UL; i<iend; i+=2UL ) {
2151  y[i ] += x[j] * A(i ,j);
2152  y[i+1UL] += x[j] * A(i+1UL,j);
2153  }
2154  if( iend < M ) {
2155  y[iend] += x[j] * A(iend,j);
2156  }
2157  }
2158  for( size_t i=0UL; i<M; ++i ) {
2159  y[i] *= scalar;
2160  }
2161  }
2162  //**********************************************************************************************
2163 
2164  //**Vectorized default assignment to dense vectors**********************************************
2178  template< typename VT1 // Type of the left-hand side target vector
2179  , typename MT1 // Type of the left-hand side matrix operand
2180  , typename VT2 // Type of the right-hand side vector operand
2181  , typename ST2 > // Type of the scalar value
2182  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2183  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2184  {
2185  typedef IntrinsicTrait<ElementType> IT;
2186 
2187  const size_t M( A.rows() );
2188  const size_t N( A.columns() );
2189 
2190  const IntrinsicType factor( set( scalar ) );
2191 
2192  size_t i( 0UL );
2193 
2194  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2195  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2196  for( size_t j=0UL; j<N; ++j ) {
2197  const IntrinsicType x1( set( x[j] ) );
2198  xmm1 = xmm1 + A.load(i ,j) * x1;
2199  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2200  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2201  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2202  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2203  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2204  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2205  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2206  }
2207  y.store( i , xmm1*factor );
2208  y.store( i+IT::size , xmm2*factor );
2209  y.store( i+IT::size*2UL, xmm3*factor );
2210  y.store( i+IT::size*3UL, xmm4*factor );
2211  y.store( i+IT::size*4UL, xmm5*factor );
2212  y.store( i+IT::size*5UL, xmm6*factor );
2213  y.store( i+IT::size*6UL, xmm7*factor );
2214  y.store( i+IT::size*7UL, xmm8*factor );
2215  }
2216  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2217  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2218  for( size_t j=0UL; j<N; ++j ) {
2219  const IntrinsicType x1( set( x[j] ) );
2220  xmm1 = xmm1 + A.load(i ,j) * x1;
2221  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2222  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2223  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2224  }
2225  y.store( i , xmm1*factor );
2226  y.store( i+IT::size , xmm2*factor );
2227  y.store( i+IT::size*2UL, xmm3*factor );
2228  y.store( i+IT::size*3UL, xmm4*factor );
2229  }
2230  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2231  IntrinsicType xmm1, xmm2, xmm3;
2232  for( size_t j=0UL; j<N; ++j ) {
2233  const IntrinsicType x1( set( x[j] ) );
2234  xmm1 = xmm1 + A.load(i ,j) * x1;
2235  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2236  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2237  }
2238  y.store( i , xmm1*factor );
2239  y.store( i+IT::size , xmm2*factor );
2240  y.store( i+IT::size*2UL, xmm3*factor );
2241  }
2242  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2243  IntrinsicType xmm1, xmm2;
2244  for( size_t j=0UL; j<N; ++j ) {
2245  const IntrinsicType x1( set( x[j] ) );
2246  xmm1 = xmm1 + A.load(i ,j) * x1;
2247  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2248  }
2249  y.store( i , xmm1*factor );
2250  y.store( i+IT::size, xmm2*factor );
2251  }
2252  if( i < M ) {
2253  IntrinsicType xmm1;
2254  for( size_t j=0UL; j<N; ++j ) {
2255  const IntrinsicType x1( set( x[j] ) );
2256  xmm1 = xmm1 + A.load(i,j) * x1;
2257  }
2258  y.store( i, xmm1*factor );
2259  }
2260  }
2261  //**********************************************************************************************
2262 
2263  //**BLAS-based assignment to dense vectors (default)********************************************
2277  template< typename VT1 // Type of the left-hand side target vector
2278  , typename MT1 // Type of the left-hand side matrix operand
2279  , typename VT2 // Type of the right-hand side vector operand
2280  , typename ST2 > // Type of the scalar value
2281  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2282  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2283  {
2284  selectDefaultAssignKernel( y, A, x, scalar );
2285  }
2286  //**********************************************************************************************
2287 
2288  //**BLAS-based assignment to dense vectors (single precision)***********************************
2289 #if BLAZE_BLAS_MODE
2290 
2303  template< typename VT1 // Type of the left-hand side target vector
2304  , typename MT1 // Type of the left-hand side matrix operand
2305  , typename VT2 // Type of the right-hand side vector operand
2306  , typename ST2 > // Type of the scalar value
2307  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2308  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2309  {
2310  using boost::numeric_cast;
2311 
2315 
2316  const int M ( numeric_cast<int>( A.rows() ) );
2317  const int N ( numeric_cast<int>( A.columns() ) );
2318  const int lda( numeric_cast<int>( A.spacing() ) );
2319 
2320  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2321  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2322  }
2323 #endif
2324  //**********************************************************************************************
2325 
2326  //**BLAS-based assignment to dense vectors (double precision)***********************************
2327 #if BLAZE_BLAS_MODE
2328 
2341  template< typename VT1 // Type of the left-hand side target vector
2342  , typename MT1 // Type of the left-hand side matrix operand
2343  , typename VT2 // Type of the right-hand side vector operand
2344  , typename ST2 > // Type of the scalar value
2345  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2346  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2347  {
2348  using boost::numeric_cast;
2349 
2353 
2354  const int M ( numeric_cast<int>( A.rows() ) );
2355  const int N ( numeric_cast<int>( A.columns() ) );
2356  const int lda( numeric_cast<int>( A.spacing() ) );
2357 
2358  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2359  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2360  }
2361 #endif
2362  //**********************************************************************************************
2363 
2364  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2365 #if BLAZE_BLAS_MODE
2366 
2379  template< typename VT1 // Type of the left-hand side target vector
2380  , typename MT1 // Type of the left-hand side matrix operand
2381  , typename VT2 // Type of the right-hand side vector operand
2382  , typename ST2 > // Type of the scalar value
2383  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2384  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2385  {
2386  using boost::numeric_cast;
2387 
2391  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2392  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2393  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2394 
2395  const int M ( numeric_cast<int>( A.rows() ) );
2396  const int N ( numeric_cast<int>( A.columns() ) );
2397  const int lda( numeric_cast<int>( A.spacing() ) );
2398  const complex<float> alpha( scalar );
2399  const complex<float> beta ( 0.0F, 0.0F );
2400 
2401  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2402  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2403  }
2404 #endif
2405  //**********************************************************************************************
2406 
2407  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2408 #if BLAZE_BLAS_MODE
2409 
2422  template< typename VT1 // Type of the left-hand side target vector
2423  , typename MT1 // Type of the left-hand side matrix operand
2424  , typename VT2 // Type of the right-hand side vector operand
2425  , typename ST2 > // Type of the scalar value
2426  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2427  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2428  {
2429  using boost::numeric_cast;
2430 
2434  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2435  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2436  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2437 
2438  const int M ( numeric_cast<int>( A.rows() ) );
2439  const int N ( numeric_cast<int>( A.columns() ) );
2440  const int lda( numeric_cast<int>( A.spacing() ) );
2441  const complex<double> alpha( scalar );
2442  const complex<double> beta ( 0.0, 0.0 );
2443 
2444  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2445  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2446  }
2447 #endif
2448  //**********************************************************************************************
2449 
2450  //**Assignment to sparse vectors****************************************************************
2462  template< typename VT1 > // Type of the target sparse vector
2463  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2464  {
2466 
2470 
2471  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2472 
2473  const ResultType tmp( rhs );
2474  smpAssign( ~lhs, tmp );
2475  }
2476  //**********************************************************************************************
2477 
2478  //**Addition assignment to dense vectors********************************************************
2490  template< typename VT1 > // Type of the target dense vector
2491  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2492  {
2494 
2495  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2496 
2497  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2498  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2499 
2500  if( left.rows() == 0UL || left.columns() == 0UL ) {
2501  return;
2502  }
2503 
2504  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2505  RT x( right ); // Evaluation of the right-hand side dense vector operand
2506 
2507  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2508  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2509  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2510  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2511 
2512  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2513  }
2514  //**********************************************************************************************
2515 
2516  //**Addition assignment to dense vectors (kernel selection)*************************************
2527  template< typename VT1 // Type of the left-hand side target vector
2528  , typename MT1 // Type of the left-hand side matrix operand
2529  , typename VT2 // Type of the right-hand side vector operand
2530  , typename ST2 > // Type of the scalar value
2531  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2532  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2533  {
2534  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2535  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2536  DVecScalarMultExpr::selectDefaultAddAssignKernel( y, A, x, scalar );
2537  else
2538  DVecScalarMultExpr::selectBlasAddAssignKernel( y, A, x, scalar );
2539  }
2540  //**********************************************************************************************
2541 
2542  //**Addition assignment to dense vectors (kernel selection)*************************************
2553  template< typename VT1 // Type of the left-hand side target vector
2554  , typename MT1 // Type of the left-hand side matrix operand
2555  , typename VT2 // Type of the right-hand side vector operand
2556  , typename ST2 > // Type of the scalar value
2557  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2558  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2559  {
2560  smpAddAssign( y, A * x * scalar );
2561  }
2562  //**********************************************************************************************
2563 
2564  //**Default addition assignment to dense vectors************************************************
2578  template< typename VT1 // Type of the left-hand side target vector
2579  , typename MT1 // Type of the left-hand side matrix operand
2580  , typename VT2 // Type of the right-hand side vector operand
2581  , typename ST2 > // Type of the scalar value
2582  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2583  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2584  {
2585  y.addAssign( A * x * scalar );
2586  }
2587  //**********************************************************************************************
2588 
2589  //**Vectorized default addition assignment to dense vectors*************************************
2603  template< typename VT1 // Type of the left-hand side target vector
2604  , typename MT1 // Type of the left-hand side matrix operand
2605  , typename VT2 // Type of the right-hand side vector operand
2606  , typename ST2 > // Type of the scalar value
2607  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2608  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2609  {
2610  typedef IntrinsicTrait<ElementType> IT;
2611 
2612  const size_t M( A.rows() );
2613  const size_t N( A.columns() );
2614 
2615  const IntrinsicType factor( set( scalar ) );
2616 
2617  size_t i( 0UL );
2618 
2619  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2620  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2621  for( size_t j=0UL; j<N; ++j ) {
2622  const IntrinsicType x1( set( x[j] ) );
2623  xmm1 = xmm1 + A.load(i ,j) * x1;
2624  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2625  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2626  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2627  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2628  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2629  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2630  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2631  }
2632  y.store( i , y.load(i ) + xmm1*factor );
2633  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2634  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2635  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2636  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
2637  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
2638  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
2639  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
2640  }
2641  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2642  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2643  for( size_t j=0UL; j<N; ++j ) {
2644  const IntrinsicType x1( set( x[j] ) );
2645  xmm1 = xmm1 + A.load(i ,j) * x1;
2646  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2647  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2648  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2649  }
2650  y.store( i , y.load(i ) + xmm1*factor );
2651  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2652  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2653  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2654  }
2655  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2656  IntrinsicType xmm1, xmm2, xmm3;
2657  for( size_t j=0UL; j<N; ++j ) {
2658  const IntrinsicType x1( set( x[j] ) );
2659  xmm1 = xmm1 + A.load(i ,j) * x1;
2660  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2661  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2662  }
2663  y.store( i , y.load(i ) + xmm1*factor );
2664  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2665  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2666  }
2667  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2668  IntrinsicType xmm1, xmm2;
2669  for( size_t j=0UL; j<N; ++j ) {
2670  const IntrinsicType x1( set( x[j] ) );
2671  xmm1 = xmm1 + A.load(i ,j) * x1;
2672  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2673  }
2674  y.store( i , y.load(i ) + xmm1*factor );
2675  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
2676  }
2677  if( i < M ) {
2678  IntrinsicType xmm1;
2679  for( size_t j=0UL; j<N; ++j ) {
2680  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
2681  }
2682  y.store( i, y.load(i) + xmm1*factor );
2683  }
2684  }
2685  //**********************************************************************************************
2686 
2687  //**BLAS-based addition assignment to dense vectors (default)***********************************
2701  template< typename VT1 // Type of the left-hand side target vector
2702  , typename MT1 // Type of the left-hand side matrix operand
2703  , typename VT2 // Type of the right-hand side vector operand
2704  , typename ST2 > // Type of the scalar value
2705  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2706  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2707  {
2708  selectDefaultAddAssignKernel( y, A, x, scalar );
2709  }
2710  //**********************************************************************************************
2711 
2712  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2713 #if BLAZE_BLAS_MODE
2714 
2727  template< typename VT1 // Type of the left-hand side target vector
2728  , typename MT1 // Type of the left-hand side matrix operand
2729  , typename VT2 // Type of the right-hand side vector operand
2730  , typename ST2 > // Type of the scalar value
2731  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2732  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2733  {
2734  using boost::numeric_cast;
2735 
2739 
2740  const int M ( numeric_cast<int>( A.rows() ) );
2741  const int N ( numeric_cast<int>( A.columns() ) );
2742  const int lda( numeric_cast<int>( A.spacing() ) );
2743 
2744  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2745  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2746  }
2747 #endif
2748  //**********************************************************************************************
2749 
2750  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2751 #if BLAZE_BLAS_MODE
2752 
2765  template< typename VT1 // Type of the left-hand side target vector
2766  , typename MT1 // Type of the left-hand side matrix operand
2767  , typename VT2 // Type of the right-hand side vector operand
2768  , typename ST2 > // Type of the scalar value
2769  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2770  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2771  {
2772  using boost::numeric_cast;
2773 
2777 
2778  const int M ( numeric_cast<int>( A.rows() ) );
2779  const int N ( numeric_cast<int>( A.columns() ) );
2780  const int lda( numeric_cast<int>( A.spacing() ) );
2781 
2782  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2783  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2784  }
2785 #endif
2786  //**********************************************************************************************
2787 
2788  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2789 #if BLAZE_BLAS_MODE
2790 
2803  template< typename VT1 // Type of the left-hand side target vector
2804  , typename MT1 // Type of the left-hand side matrix operand
2805  , typename VT2 // Type of the right-hand side vector operand
2806  , typename ST2 > // Type of the scalar value
2807  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2808  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2809  {
2810  using boost::numeric_cast;
2811 
2815  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2816  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2817  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2818 
2819  const int M ( numeric_cast<int>( A.rows() ) );
2820  const int N ( numeric_cast<int>( A.columns() ) );
2821  const int lda( numeric_cast<int>( A.spacing() ) );
2822  const complex<float> alpha( scalar );
2823  const complex<float> beta ( 1.0F, 0.0F );
2824 
2825  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2826  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2827  }
2828 #endif
2829  //**********************************************************************************************
2830 
2831  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2832 #if BLAZE_BLAS_MODE
2833 
2846  template< typename VT1 // Type of the left-hand side target vector
2847  , typename MT1 // Type of the left-hand side matrix operand
2848  , typename VT2 // Type of the right-hand side vector operand
2849  , typename ST2 > // Type of the scalar value
2850  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2851  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2852  {
2853  using boost::numeric_cast;
2854 
2858  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2859  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2860  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2861 
2862  const int M ( numeric_cast<int>( A.rows() ) );
2863  const int N ( numeric_cast<int>( A.columns() ) );
2864  const int lda( numeric_cast<int>( A.spacing() ) );
2865  const complex<double> alpha( scalar );
2866  const complex<double> beta ( 1.0, 0.0 );
2867 
2868  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2869  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2870  }
2871 #endif
2872  //**********************************************************************************************
2873 
2874  //**Addition assignment to sparse vectors*******************************************************
2875  // No special implementation for the addition assignment to sparse vectors.
2876  //**********************************************************************************************
2877 
2878  //**Subtraction assignment to dense vectors*****************************************************
2890  template< typename VT1 > // Type of the target dense vector
2891  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2892  {
2894 
2895  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2896 
2897  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2898  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2899 
2900  if( left.rows() == 0UL || left.columns() == 0UL ) {
2901  return;
2902  }
2903 
2904  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2905  RT x( right ); // Evaluation of the right-hand side dense vector operand
2906 
2907  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2908  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2909  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2910  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2911 
2912  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2913  }
2914  //**********************************************************************************************
2915 
2916  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2927  template< typename VT1 // Type of the left-hand side target vector
2928  , typename MT1 // Type of the left-hand side matrix operand
2929  , typename VT2 // Type of the right-hand side vector operand
2930  , typename ST2 > // Type of the scalar value
2931  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2932  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2933  {
2934  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2935  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2936  DVecScalarMultExpr::selectDefaultSubAssignKernel( y, A, x, scalar );
2937  else
2938  DVecScalarMultExpr::selectBlasSubAssignKernel( y, A, x, scalar );
2939  }
2940  //**********************************************************************************************
2941 
2942  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2953  template< typename VT1 // Type of the left-hand side target vector
2954  , typename MT1 // Type of the left-hand side matrix operand
2955  , typename VT2 // Type of the right-hand side vector operand
2956  , typename ST2 > // Type of the scalar value
2957  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2958  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2959  {
2960  smpSubAssign( y, A * x * scalar );
2961  }
2962  //**********************************************************************************************
2963 
2964  //**Default subtraction assignment to dense vectors*********************************************
2978  template< typename VT1 // Type of the left-hand side target vector
2979  , typename MT1 // Type of the left-hand side matrix operand
2980  , typename VT2 // Type of the right-hand side vector operand
2981  , typename ST2 > // Type of the scalar value
2982  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2983  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2984  {
2985  y.subAssign( A * x * scalar );
2986  }
2987  //**********************************************************************************************
2988 
2989  //**Vectorized default subtraction assignment to dense vectors**********************************
3003  template< typename VT1 // Type of the left-hand side target vector
3004  , typename MT1 // Type of the left-hand side matrix operand
3005  , typename VT2 // Type of the right-hand side vector operand
3006  , typename ST2 > // Type of the scalar value
3007  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3008  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3009  {
3010  typedef IntrinsicTrait<ElementType> IT;
3011 
3012  const size_t M( A.rows() );
3013  const size_t N( A.columns() );
3014 
3015  const IntrinsicType factor( set( scalar ) );
3016 
3017  size_t i( 0UL );
3018 
3019  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3020  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3021  for( size_t j=0UL; j<N; ++j ) {
3022  const IntrinsicType x1( set( x[j] ) );
3023  xmm1 = xmm1 + A.load(i ,j) * x1;
3024  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3025  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3026  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3027  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3028  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3029  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3030  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3031  }
3032  y.store( i , y.load(i ) - xmm1*factor );
3033  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3034  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3035  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
3036  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
3037  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
3038  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
3039  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
3040  }
3041  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3042  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3043  for( size_t j=0UL; j<N; ++j ) {
3044  const IntrinsicType x1( set( x[j] ) );
3045  xmm1 = xmm1 + A.load(i ,j) * x1;
3046  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3047  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3048  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3049  }
3050  y.store( i , y.load(i ) - xmm1*factor );
3051  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3052  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3053  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
3054  }
3055  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
3056  IntrinsicType xmm1, xmm2, xmm3;
3057  for( size_t j=0UL; j<N; ++j ) {
3058  const IntrinsicType x1( set( x[j] ) );
3059  xmm1 = xmm1 + A.load(i ,j) * x1;
3060  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3061  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3062  }
3063  y.store( i , y.load(i ) - xmm1*factor );
3064  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3065  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3066  }
3067  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3068  IntrinsicType xmm1, xmm2;
3069  for( size_t j=0UL; j<N; ++j ) {
3070  const IntrinsicType x1( set( x[j] ) );
3071  xmm1 = xmm1 + A.load(i ,j) * x1;
3072  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3073  }
3074  y.store( i , y.load(i ) - xmm1*factor );
3075  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
3076  }
3077  if( i < M ) {
3078  IntrinsicType xmm1;
3079  for( size_t j=0UL; j<N; ++j ) {
3080  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3081  }
3082  y.store( i, y.load(i) - xmm1*factor );
3083  }
3084  }
3085  //**********************************************************************************************
3086 
3087  //**BLAS-based subtraction assignment to dense vectors (default)********************************
3101  template< typename VT1 // Type of the left-hand side target vector
3102  , typename MT1 // Type of the left-hand side matrix operand
3103  , typename VT2 // Type of the right-hand side vector operand
3104  , typename ST2 > // Type of the scalar value
3105  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3106  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3107  {
3108  selectDefaultSubAssignKernel( y, A, x, scalar );
3109  }
3110  //**********************************************************************************************
3111 
3112  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
3113 #if BLAZE_BLAS_MODE
3114 
3127  template< typename VT1 // Type of the left-hand side target vector
3128  , typename MT1 // Type of the left-hand side matrix operand
3129  , typename VT2 // Type of the right-hand side vector operand
3130  , typename ST2 > // Type of the scalar value
3131  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3132  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3133  {
3134  using boost::numeric_cast;
3135 
3139 
3140  const int M ( numeric_cast<int>( A.rows() ) );
3141  const int N ( numeric_cast<int>( A.columns() ) );
3142  const int lda( numeric_cast<int>( A.spacing() ) );
3143 
3144  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
3145  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
3146  }
3147 #endif
3148  //**********************************************************************************************
3149 
3150  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
3151 #if BLAZE_BLAS_MODE
3152 
3165  template< typename VT1 // Type of the left-hand side target vector
3166  , typename MT1 // Type of the left-hand side matrix operand
3167  , typename VT2 // Type of the right-hand side vector operand
3168  , typename ST2 > // Type of the scalar value
3169  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3170  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3171  {
3172  using boost::numeric_cast;
3173 
3177 
3178  const int M ( numeric_cast<int>( A.rows() ) );
3179  const int N ( numeric_cast<int>( A.columns() ) );
3180  const int lda( numeric_cast<int>( A.spacing() ) );
3181 
3182  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
3183  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
3184  }
3185 #endif
3186  //**********************************************************************************************
3187 
3188  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
3189 #if BLAZE_BLAS_MODE
3190 
3203  template< typename VT1 // Type of the left-hand side target vector
3204  , typename MT1 // Type of the left-hand side matrix operand
3205  , typename VT2 // Type of the right-hand side vector operand
3206  , typename ST2 > // Type of the scalar value
3207  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3208  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3209  {
3210  using boost::numeric_cast;
3211 
3215  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
3216  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
3217  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
3218 
3219  const int M ( numeric_cast<int>( A.rows() ) );
3220  const int N ( numeric_cast<int>( A.columns() ) );
3221  const int lda( numeric_cast<int>( A.spacing() ) );
3222  const complex<float> alpha( -scalar );
3223  const complex<float> beta ( 1.0F, 0.0F );
3224 
3225  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
3226  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3227  }
3228 #endif
3229  //**********************************************************************************************
3230 
3231  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
3232 #if BLAZE_BLAS_MODE
3233 
3246  template< typename VT1 // Type of the left-hand side target vector
3247  , typename MT1 // Type of the left-hand side matrix operand
3248  , typename VT2 // Type of the right-hand side vector operand
3249  , typename ST2 > // Type of the scalar value
3250  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3251  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3252  {
3253  using boost::numeric_cast;
3254 
3258  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
3259  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
3260  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
3261 
3262  const int M ( numeric_cast<int>( A.rows() ) );
3263  const int N ( numeric_cast<int>( A.columns() ) );
3264  const int lda( numeric_cast<int>( A.spacing() ) );
3265  const complex<double> alpha( -scalar );
3266  const complex<double> beta ( 1.0, 0.0 );
3267 
3268  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
3269  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3270  }
3271 #endif
3272  //**********************************************************************************************
3273 
3274  //**Subtraction assignment to sparse vectors****************************************************
3275  // No special implementation for the subtraction assignment to sparse vectors.
3276  //**********************************************************************************************
3277 
3278  //**Multiplication assignment to dense vectors**************************************************
3290  template< typename VT1 > // Type of the target dense vector
3291  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3292  {
3294 
3298 
3299  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3300 
3301  const ResultType tmp( rhs );
3302  multAssign( ~lhs, tmp );
3303  }
3304  //**********************************************************************************************
3305 
3306  //**Multiplication assignment to sparse vectors*************************************************
3307  // No special implementation for the multiplication assignment to sparse vectors.
3308  //**********************************************************************************************
3309 
3310  //**Compile time checks*************************************************************************
3319  //**********************************************************************************************
3320 };
3322 //*************************************************************************************************
3323 
3324 
3325 
3326 
3327 //=================================================================================================
3328 //
3329 // GLOBAL BINARY ARITHMETIC OPERATORS
3330 //
3331 //=================================================================================================
3332 
3333 //*************************************************************************************************
3364 template< typename T1 // Type of the left-hand side dense matrix
3365  , typename T2 > // Type of the right-hand side dense vector
3366 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
3368 {
3370 
3371  if( (~mat).columns() != (~vec).size() )
3372  throw std::invalid_argument( "Matrix and vector sizes do not match" );
3373 
3374  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
3375 }
3376 //*************************************************************************************************
3377 
3378 
3379 
3380 
3381 //=================================================================================================
3382 //
3383 // EXPRESSION TRAIT SPECIALIZATIONS
3384 //
3385 //=================================================================================================
3386 
3387 //*************************************************************************************************
3389 template< typename MT, typename VT, bool AF >
3390 struct SubvectorExprTrait< TDMatDVecMultExpr<MT,VT>, AF >
3391 {
3392  public:
3393  //**********************************************************************************************
3394  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type, VT >::Type Type;
3395  //**********************************************************************************************
3396 };
3398 //*************************************************************************************************
3399 
3400 } // namespace blaze
3401 
3402 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:257
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:112
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:251
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:280
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
Header file for the DenseVector base class.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:244
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:396
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:122
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:251
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:254
Header file for the multiplication trait.
Header file for the dense vector SMP implementation.
Header file for the IsDouble type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:397
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:111
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:332
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDMatDVecMultExpr.h:398
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:247
Constraints on the storage order of matrix types.
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:248
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:260
Header file for the EnableIf class template.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:322
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:366
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:113
Header file for the SubmatrixExprTrait class template.
System settings for the BLAS mode.
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:243
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:246
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
const size_t TDMATDVECMULT_THRESHOLD
Column-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:74
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:354
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:114
Header file for all intrinsic functionality.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:245
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:295
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:242
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Header file for the sparse vector SMP implementation.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:154
Header file for basic type definitions.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:116
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:386
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:376
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:342
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:115
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
const size_t SMP_TDMATDVECMULT_THRESHOLD
SMP column-major dense matrix/dense vector multiplication threshold.This threshold represents the sys...
Definition: Thresholds.h:152