All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
55 #include <blaze/math/Intrinsics.h>
56 #include <blaze/math/shims/Reset.h>
69 #include <blaze/system/BLAS.h>
71 #include <blaze/util/Assert.h>
72 #include <blaze/util/Complex.h>
78 #include <blaze/util/DisableIf.h>
79 #include <blaze/util/EnableIf.h>
81 #include <blaze/util/SelectType.h>
82 #include <blaze/util/Types.h>
88 
89 
90 namespace blaze {
91 
92 //=================================================================================================
93 //
94 // CLASS TDMATDVECMULTEXPR
95 //
96 //=================================================================================================
97 
98 //*************************************************************************************************
105 template< typename MT // Type of the left-hand side dense matrix
106  , typename VT > // Type of the right-hand side dense vector
107 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
108  , private MatVecMultExpr
109  , private Computation
110 {
111  private:
112  //**Type definitions****************************************************************************
113  typedef typename MT::ResultType MRT;
114  typedef typename VT::ResultType VRT;
115  typedef typename MRT::ElementType MET;
116  typedef typename VRT::ElementType VET;
117  typedef typename MT::CompositeType MCT;
118  typedef typename VT::CompositeType VCT;
119  //**********************************************************************************************
120 
121  //**********************************************************************************************
123  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
125  //**********************************************************************************************
126 
127  //**********************************************************************************************
129  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
130  //**********************************************************************************************
131 
132  //**********************************************************************************************
134 
138  template< typename T1 >
139  struct UseSMPAssign {
140  enum { value = ( evaluateMatrix || evaluateVector ) };
141  };
143  //**********************************************************************************************
144 
145  //**********************************************************************************************
147 
151  template< typename T1, typename T2, typename T3 >
152  struct UseSinglePrecisionKernel {
153  enum { value = BLAZE_BLAS_MODE &&
154  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
158  };
160  //**********************************************************************************************
161 
162  //**********************************************************************************************
164 
168  template< typename T1, typename T2, typename T3 >
169  struct UseDoublePrecisionKernel {
170  enum { value = BLAZE_BLAS_MODE &&
171  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
175  };
177  //**********************************************************************************************
178 
179  //**********************************************************************************************
181 
185  template< typename T1, typename T2, typename T3 >
186  struct UseSinglePrecisionComplexKernel {
187  typedef complex<float> Type;
188  enum { value = BLAZE_BLAS_MODE &&
189  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
193  };
195  //**********************************************************************************************
196 
197  //**********************************************************************************************
199 
203  template< typename T1, typename T2, typename T3 >
204  struct UseDoublePrecisionComplexKernel {
205  typedef complex<double> Type;
206  enum { value = BLAZE_BLAS_MODE &&
207  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
211  };
213  //**********************************************************************************************
214 
215  //**********************************************************************************************
217 
220  template< typename T1, typename T2, typename T3 >
221  struct UseDefaultKernel {
222  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
223  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
224  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
225  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
226  };
228  //**********************************************************************************************
229 
230  //**********************************************************************************************
232 
236  template< typename T1, typename T2, typename T3 >
237  struct UseVectorizedDefaultKernel {
238  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
243  };
245  //**********************************************************************************************
246 
247  public:
248  //**Type definitions****************************************************************************
254  typedef const ElementType ReturnType;
255  typedef const ResultType CompositeType;
256 
258  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
259 
261  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
262 
265 
268  //**********************************************************************************************
269 
270  //**Compilation flags***************************************************************************
272  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
276 
278  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
279  !evaluateVector && VT::smpAssignable };
280  //**********************************************************************************************
281 
282  //**Constructor*********************************************************************************
288  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
289  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
290  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
291  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
292  {
293  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
294  }
295  //**********************************************************************************************
296 
297  //**Subscript operator**************************************************************************
303  inline ReturnType operator[]( size_t index ) const {
304  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
305 
306  ElementType res;
307 
308  if( mat_.columns() != 0UL ) {
309  res = mat_(index,0UL) * vec_[0UL];
310  for( size_t j=1UL; j<end_; j+=2UL ) {
311  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
312  }
313  if( end_ < mat_.columns() ) {
314  res += mat_(index,end_) * vec_[end_];
315  }
316  }
317  else {
318  reset( res );
319  }
320 
321  return res;
322  }
323  //**********************************************************************************************
324 
325  //**Size function*******************************************************************************
330  inline size_t size() const {
331  return mat_.rows();
332  }
333  //**********************************************************************************************
334 
335  //**Left operand access*************************************************************************
340  inline LeftOperand leftOperand() const {
341  return mat_;
342  }
343  //**********************************************************************************************
344 
345  //**Right operand access************************************************************************
350  inline RightOperand rightOperand() const {
351  return vec_;
352  }
353  //**********************************************************************************************
354 
355  //**********************************************************************************************
361  template< typename T >
362  inline bool canAlias( const T* alias ) const {
363  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
364  }
365  //**********************************************************************************************
366 
367  //**********************************************************************************************
373  template< typename T >
374  inline bool isAliased( const T* alias ) const {
375  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
376  }
377  //**********************************************************************************************
378 
379  //**********************************************************************************************
384  inline bool isAligned() const {
385  return mat_.isAligned() && vec_.isAligned();
386  }
387  //**********************************************************************************************
388 
389  //**********************************************************************************************
394  inline bool canSMPAssign() const {
395  return ( !BLAZE_BLAS_IS_PARALLEL ||
396  ( IsComputation<MT>::value && !evaluateMatrix ) ||
397  ( mat_.rows() * mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
399  }
400  //**********************************************************************************************
401 
402  private:
403  //**Member variables****************************************************************************
406  const size_t end_;
407  //**********************************************************************************************
408 
409  //**BLAS kernel (single precision)**************************************************************
410 #if BLAZE_BLAS_MODE
411 
426  template< typename VT1 // Type of the left-hand side target vector
427  , typename MT1 // Type of the left-hand side matrix operand
428  , typename VT2 > // Type of the right-hand side vector operand
429  static inline void sgemv( VT1& y, const MT1& A, const VT2& x, float alpha, float beta )
430  {
431  using boost::numeric_cast;
432 
436 
437  const int M ( numeric_cast<int>( A.rows() ) );
438  const int N ( numeric_cast<int>( A.columns() ) );
439  const int lda( numeric_cast<int>( A.spacing() ) );
440 
441  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
442  A.data(), lda, x.data(), 1, beta, y.data(), 1 );
443  }
445 #endif
446  //**********************************************************************************************
447 
448  //**BLAS kernel (double precision)**************************************************************
449 #if BLAZE_BLAS_MODE
450 
465  template< typename VT1 // Type of the left-hand side target vector
466  , typename MT1 // Type of the left-hand side matrix operand
467  , typename VT2 > // Type of the right-hand side vector operand
468  static inline void dgemv( VT1& y, const MT1& A, const VT2& x, double alpha, double beta )
469  {
470  using boost::numeric_cast;
471 
475 
476  const int M ( numeric_cast<int>( A.rows() ) );
477  const int N ( numeric_cast<int>( A.columns() ) );
478  const int lda( numeric_cast<int>( A.spacing() ) );
479 
480  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
481  A.data(), lda, x.data(), 1, beta, y.data(), 1 );
482  }
484 #endif
485  //**********************************************************************************************
486 
487  //**BLAS kernel (single precision complex)******************************************************
488 #if BLAZE_BLAS_MODE
489 
504  template< typename VT1 // Type of the left-hand side target vector
505  , typename MT1 // Type of the left-hand side matrix operand
506  , typename VT2 > // Type of the right-hand side vector operand
507  static inline void cgemv( VT1& y, const MT1& A, const VT2& x,
508  complex<float> alpha, complex<float> beta )
509  {
510  using boost::numeric_cast;
511 
515  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
516  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
517  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
518 
519  const int M ( numeric_cast<int>( A.rows() ) );
520  const int N ( numeric_cast<int>( A.columns() ) );
521  const int lda( numeric_cast<int>( A.spacing() ) );
522 
523  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
524  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
525  }
527 #endif
528  //**********************************************************************************************
529 
530  //**BLAS kernel (double precision complex)******************************************************
531 #if BLAZE_BLAS_MODE
532 
547  template< typename VT1 // Type of the left-hand side target vector
548  , typename MT1 // Type of the left-hand side matrix operand
549  , typename VT2 > // Type of the right-hand side vector operand
550  static inline void zgemv( VT1& y, const MT1& A, const VT2& x,
551  complex<double> alpha, complex<double> beta )
552  {
553  using boost::numeric_cast;
554 
558  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
559  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
560  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
561 
562  const int M ( numeric_cast<int>( A.rows() ) );
563  const int N ( numeric_cast<int>( A.columns() ) );
564  const int lda( numeric_cast<int>( A.spacing() ) );
565 
566  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
567  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
568  }
570 #endif
571  //**********************************************************************************************
572 
573  //**Assignment to dense vectors*****************************************************************
586  template< typename VT1 > // Type of the target dense vector
587  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
588  {
590 
591  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
592 
593  if( rhs.mat_.rows() == 0UL ) {
594  return;
595  }
596  else if( rhs.mat_.columns() == 0UL ) {
597  reset( ~lhs );
598  return;
599  }
600 
601  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
602  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
603 
604  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
605  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
606  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
607  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
608 
609  TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
610  }
612  //**********************************************************************************************
613 
614  //**Assignment to dense vectors (kernel selection)**********************************************
625  template< typename VT1 // Type of the left-hand side target vector
626  , typename MT1 // Type of the left-hand side matrix operand
627  , typename VT2 > // Type of the right-hand side vector operand
628  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
629  {
630  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
631  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
632  TDMatDVecMultExpr::selectDefaultAssignKernel( y, A, x );
633  else
634  TDMatDVecMultExpr::selectBlasAssignKernel( y, A, x );
635  }
637  //**********************************************************************************************
638 
639  //**Default assignment to dense vectors*********************************************************
653  template< typename VT1 // Type of the left-hand side target vector
654  , typename MT1 // Type of the left-hand side matrix operand
655  , typename VT2 > // Type of the right-hand side vector operand
656  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
657  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
658  {
659  const size_t M( A.rows() );
660  const size_t N( A.columns() );
661 
662  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
663  const size_t iend( M & size_t(-2) );
664 
665  for( size_t i=0UL; i<M; ++i ) {
666  y[i] = x[0UL] * A(i,0UL);
667  }
668  for( size_t j=1UL; j<N; ++j ) {
669  for( size_t i=0UL; i<iend; i+=2UL ) {
670  y[i ] += x[j] * A(i ,j);
671  y[i+1UL] += x[j] * A(i+1UL,j);
672  }
673  if( iend < M ) {
674  y[iend] += x[j] * A(iend,j);
675  }
676  }
677  }
679  //**********************************************************************************************
680 
681  //**Vectorized default assignment to dense vectors**********************************************
695  template< typename VT1 // Type of the left-hand side target vector
696  , typename MT1 // Type of the left-hand side matrix operand
697  , typename VT2 > // Type of the right-hand side vector operand
698  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
699  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
700  {
701  typedef IntrinsicTrait<ElementType> IT;
702 
703  const size_t M( A.rows() );
704  const size_t N( A.columns() );
705 
706  size_t i( 0UL );
707 
708  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
709  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
710  for( size_t j=0UL; j<N; ++j ) {
711  const IntrinsicType x1( set( x[j] ) );
712  xmm1 = xmm1 + A.load(i ,j) * x1;
713  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
714  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
715  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
716  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
717  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
718  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
719  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
720  }
721  y.store( i , xmm1 );
722  y.store( i+IT::size , xmm2 );
723  y.store( i+IT::size*2UL, xmm3 );
724  y.store( i+IT::size*3UL, xmm4 );
725  y.store( i+IT::size*4UL, xmm5 );
726  y.store( i+IT::size*5UL, xmm6 );
727  y.store( i+IT::size*6UL, xmm7 );
728  y.store( i+IT::size*7UL, xmm8 );
729  }
730  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
731  IntrinsicType xmm1, xmm2, xmm3, xmm4;
732  for( size_t j=0UL; j<N; ++j ) {
733  const IntrinsicType x1( set( x[j] ) );
734  xmm1 = xmm1 + A.load(i ,j) * x1;
735  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
736  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
737  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
738  }
739  y.store( i , xmm1 );
740  y.store( i+IT::size , xmm2 );
741  y.store( i+IT::size*2UL, xmm3 );
742  y.store( i+IT::size*3UL, xmm4 );
743  }
744  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
745  IntrinsicType xmm1, xmm2, xmm3;
746  for( size_t j=0UL; j<N; ++j ) {
747  const IntrinsicType x1( set( x[j] ) );
748  xmm1 = xmm1 + A.load(i ,j) * x1;
749  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
750  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
751  }
752  y.store( i , xmm1 );
753  y.store( i+IT::size , xmm2 );
754  y.store( i+IT::size*2UL, xmm3 );
755  }
756  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
757  IntrinsicType xmm1, xmm2;
758  for( size_t j=0UL; j<N; ++j ) {
759  const IntrinsicType x1( set( x[j] ) );
760  xmm1 = xmm1 + A.load(i ,j) * x1;
761  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
762  }
763  y.store( i , xmm1 );
764  y.store( i+IT::size, xmm2 );
765  }
766  if( i < M ) {
767  IntrinsicType xmm1;
768  for( size_t j=0UL; j<N; ++j ) {
769  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
770  }
771  y.store( i, xmm1 );
772  }
773  }
775  //**********************************************************************************************
776 
777  //**BLAS-based assignment to dense vectors (default)********************************************
791  template< typename VT1 // Type of the left-hand side target vector
792  , typename MT1 // Type of the left-hand side matrix operand
793  , typename VT2 > // Type of the right-hand side vector operand
794  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
795  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
796  {
797  selectDefaultAssignKernel( y, A, x );
798  }
800  //**********************************************************************************************
801 
802  //**BLAS-based assignment to dense vectors (single precision)***********************************
803 #if BLAZE_BLAS_MODE
804 
817  template< typename VT1 // Type of the left-hand side target vector
818  , typename MT1 // Type of the left-hand side matrix operand
819  , typename VT2 > // Type of the right-hand side vector operand
820  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
821  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
822  {
823  sgemv( y, A, x, 1.0F, 0.0F );
824  }
826 #endif
827  //**********************************************************************************************
828 
829  //**BLAS-based assignment to dense vectors (double precision)***********************************
830 #if BLAZE_BLAS_MODE
831 
844  template< typename VT1 // Type of the left-hand side target vector
845  , typename MT1 // Type of the left-hand side matrix operand
846  , typename VT2 > // Type of the right-hand side vector operand
847  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
848  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
849  {
850  dgemv( y, A, x, 1.0, 0.0 );
851  }
853 #endif
854  //**********************************************************************************************
855 
856  //**BLAS-based assignment to dense vectors (single precision complex)***************************
857 #if BLAZE_BLAS_MODE
858 
871  template< typename VT1 // Type of the left-hand side target vector
872  , typename MT1 // Type of the left-hand side matrix operand
873  , typename VT2 > // Type of the right-hand side vector operand
874  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
875  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
876  {
877  cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
878  }
880 #endif
881  //**********************************************************************************************
882 
883  //**BLAS-based assignment to dense vectors (double precision complex)***************************
884 #if BLAZE_BLAS_MODE
885 
898  template< typename VT1 // Type of the left-hand side target vector
899  , typename MT1 // Type of the left-hand side matrix operand
900  , typename VT2 > // Type of the right-hand side vector operand
901  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
902  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
903  {
904  zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
905  }
907 #endif
908  //**********************************************************************************************
909 
910  //**Assignment to sparse vectors****************************************************************
923  template< typename VT1 > // Type of the target sparse vector
924  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
925  {
927 
931 
932  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
933 
934  const ResultType tmp( serial( rhs ) );
935  assign( ~lhs, tmp );
936  }
938  //**********************************************************************************************
939 
940  //**Addition assignment to dense vectors********************************************************
953  template< typename VT1 > // Type of the target dense vector
954  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
955  {
957 
958  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
959 
960  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
961  return;
962  }
963 
964  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
965  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
966 
967  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
968  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
969  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
970  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
971 
972  TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
973  }
975  //**********************************************************************************************
976 
977  //**Addition assignment to dense vectors (kernel selection)*************************************
988  template< typename VT1 // Type of the left-hand side target vector
989  , typename MT1 // Type of the left-hand side matrix operand
990  , typename VT2 > // Type of the right-hand side vector operand
991  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
992  {
993  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
994  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
995  TDMatDVecMultExpr::selectDefaultAddAssignKernel( y, A, x );
996  else
997  TDMatDVecMultExpr::selectBlasAddAssignKernel( y, A, x );
998  }
1000  //**********************************************************************************************
1001 
1002  //**Default addition assignment to dense vectors************************************************
1016  template< typename VT1 // Type of the left-hand side target vector
1017  , typename MT1 // Type of the left-hand side matrix operand
1018  , typename VT2 > // Type of the right-hand side vector operand
1019  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1020  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1021  {
1022  const size_t M( A.rows() );
1023  const size_t N( A.columns() );
1024 
1025  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1026  const size_t iend( M & size_t(-2) );
1027 
1028  for( size_t j=0UL; j<N; ++j ) {
1029  for( size_t i=0UL; i<iend; i+=2UL ) {
1030  y[i ] += x[j] * A(i ,j);
1031  y[i+1UL] += x[j] * A(i+1UL,j);
1032  }
1033  if( iend < M ) {
1034  y[iend] += x[j] * A(iend,j);
1035  }
1036  }
1037  }
1039  //**********************************************************************************************
1040 
1041  //**Vectorized default addition assignment to dense vectors*************************************
1055  template< typename VT1 // Type of the left-hand side target vector
1056  , typename MT1 // Type of the left-hand side matrix operand
1057  , typename VT2 > // Type of the right-hand side vector operand
1058  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1059  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1060  {
1061  typedef IntrinsicTrait<ElementType> IT;
1062 
1063  const size_t M( A.rows() );
1064  const size_t N( A.columns() );
1065 
1066  size_t i( 0UL );
1067 
1068  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1069  IntrinsicType xmm1( y.load(i ) );
1070  IntrinsicType xmm2( y.load(i+IT::size ) );
1071  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1072  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1073  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1074  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1075  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1076  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1077  for( size_t j=0UL; j<N; ++j ) {
1078  const IntrinsicType x1( set( x[j] ) );
1079  xmm1 = xmm1 + A.load(i ,j) * x1;
1080  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1081  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1082  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1083  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1084  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1085  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1086  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1087  }
1088  y.store( i , xmm1 );
1089  y.store( i+IT::size , xmm2 );
1090  y.store( i+IT::size*2UL, xmm3 );
1091  y.store( i+IT::size*3UL, xmm4 );
1092  y.store( i+IT::size*4UL, xmm5 );
1093  y.store( i+IT::size*5UL, xmm6 );
1094  y.store( i+IT::size*6UL, xmm7 );
1095  y.store( i+IT::size*7UL, xmm8 );
1096  }
1097  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1098  IntrinsicType xmm1( y.load(i ) );
1099  IntrinsicType xmm2( y.load(i+IT::size ) );
1100  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1101  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1102  for( size_t j=0UL; j<N; ++j ) {
1103  const IntrinsicType x1( set( x[j] ) );
1104  xmm1 = xmm1 + A.load(i ,j) * x1;
1105  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1106  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1107  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1108  }
1109  y.store( i , xmm1 );
1110  y.store( i+IT::size , xmm2 );
1111  y.store( i+IT::size*2UL, xmm3 );
1112  y.store( i+IT::size*3UL, xmm4 );
1113  }
1114  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1115  IntrinsicType xmm1( y.load(i ) );
1116  IntrinsicType xmm2( y.load(i+IT::size ) );
1117  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1118  for( size_t j=0UL; j<N; ++j ) {
1119  const IntrinsicType x1( set( x[j] ) );
1120  xmm1 = xmm1 + A.load(i ,j) * x1;
1121  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1122  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1123  }
1124  y.store( i , xmm1 );
1125  y.store( i+IT::size , xmm2 );
1126  y.store( i+IT::size*2UL, xmm3 );
1127  }
1128  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1129  IntrinsicType xmm1( y.load(i ) );
1130  IntrinsicType xmm2( y.load(i+IT::size) );
1131  for( size_t j=0UL; j<N; ++j ) {
1132  const IntrinsicType x1( set( x[j] ) );
1133  xmm1 = xmm1 + A.load(i ,j) * x1;
1134  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1135  }
1136  y.store( i , xmm1 );
1137  y.store( i+IT::size, xmm2 );
1138  }
1139  if( i < M ) {
1140  IntrinsicType xmm1( y.load(i) );
1141  for( size_t j=0UL; j<N; ++j ) {
1142  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
1143  }
1144  y.store( i, xmm1 );
1145  }
1146  }
1148  //**********************************************************************************************
1149 
1150  //**BLAS-based addition assignment to dense vectors (default)***********************************
1164  template< typename VT1 // Type of the left-hand side target vector
1165  , typename MT1 // Type of the left-hand side matrix operand
1166  , typename VT2 > // Type of the right-hand side vector operand
1167  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1168  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1169  {
1170  selectDefaultAddAssignKernel( y, A, x );
1171  }
1173  //**********************************************************************************************
1174 
1175  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1176 #if BLAZE_BLAS_MODE
1177 
1190  template< typename VT1 // Type of the left-hand side target vector
1191  , typename MT1 // Type of the left-hand side matrix operand
1192  , typename VT2 > // Type of the right-hand side vector operand
1193  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1194  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1195  {
1196  sgemv( y, A, x, 1.0F, 1.0F );
1197  }
1199 #endif
1200  //**********************************************************************************************
1201 
1202  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1203 #if BLAZE_BLAS_MODE
1204 
1217  template< typename VT1 // Type of the left-hand side target vector
1218  , typename MT1 // Type of the left-hand side matrix operand
1219  , typename VT2 > // Type of the right-hand side vector operand
1220  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1221  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1222  {
1223  dgemv( y, A, x, 1.0, 1.0 );
1224  }
1226 #endif
1227  //**********************************************************************************************
1228 
1229  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1230 #if BLAZE_BLAS_MODE
1231 
1244  template< typename VT1 // Type of the left-hand side target vector
1245  , typename MT1 // Type of the left-hand side matrix operand
1246  , typename VT2 > // Type of the right-hand side vector operand
1247  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1248  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1249  {
1250  cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1251  }
1253 #endif
1254  //**********************************************************************************************
1255 
1256  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1257 #if BLAZE_BLAS_MODE
1258 
1271  template< typename VT1 // Type of the left-hand side target vector
1272  , typename MT1 // Type of the left-hand side matrix operand
1273  , typename VT2 > // Type of the right-hand side vector operand
1274  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1275  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1276  {
1277  zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1278  }
1280 #endif
1281  //**********************************************************************************************
1282 
1283  //**Addition assignment to sparse vectors*******************************************************
1284  // No special implementation for the addition assignment to sparse vectors.
1285  //**********************************************************************************************
1286 
1287  //**Subtraction assignment to dense vectors*****************************************************
1300  template< typename VT1 > // Type of the target dense vector
1301  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1302  {
1304 
1305  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1306 
1307  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1308  return;
1309  }
1310 
1311  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1312  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1313 
1314  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1315  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1316  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1317  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1318 
1319  TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1320  }
1322  //**********************************************************************************************
1323 
1324  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1335  template< typename VT1 // Type of the left-hand side target vector
1336  , typename MT1 // Type of the left-hand side matrix operand
1337  , typename VT2 > // Type of the right-hand side vector operand
1338  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1339  {
1340  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1341  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1342  TDMatDVecMultExpr::selectDefaultSubAssignKernel( y, A, x );
1343  else
1344  TDMatDVecMultExpr::selectBlasSubAssignKernel( y, A, x );
1345  }
1347  //**********************************************************************************************
1348 
1349  //**Default subtraction assignment to dense vectors*********************************************
1363  template< typename VT1 // Type of the left-hand side target vector
1364  , typename MT1 // Type of the left-hand side matrix operand
1365  , typename VT2 > // Type of the right-hand side vector operand
1366  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1367  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1368  {
1369  const size_t M( A.rows() );
1370  const size_t N( A.columns() );
1371 
1372  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1373  const size_t iend( M & size_t(-2) );
1374 
1375  for( size_t j=0UL; j<N; ++j ) {
1376  for( size_t i=0UL; i<iend; i+=2UL ) {
1377  y[i ] -= x[j] * A(i ,j);
1378  y[i+1UL] -= x[j] * A(i+1UL,j);
1379  }
1380  if( iend < M ) {
1381  y[iend] -= x[j] * A(iend,j);
1382  }
1383  }
1384  }
1386  //**********************************************************************************************
1387 
1388  //**Vectorized default subtraction assignment to dense vectors**********************************
1402  template< typename VT1 // Type of the left-hand side target vector
1403  , typename MT1 // Type of the left-hand side matrix operand
1404  , typename VT2 > // Type of the right-hand side vector operand
1405  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1406  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1407  {
1408  typedef IntrinsicTrait<ElementType> IT;
1409 
1410  const size_t M( A.rows() );
1411  const size_t N( A.columns() );
1412 
1413  size_t i( 0UL );
1414 
1415  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1416  IntrinsicType xmm1( y.load(i ) );
1417  IntrinsicType xmm2( y.load(i+IT::size ) );
1418  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1419  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1420  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1421  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1422  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1423  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1424  for( size_t j=0UL; j<N; ++j ) {
1425  const IntrinsicType x1( set( x[j] ) );
1426  xmm1 = xmm1 - A.load(i ,j) * x1;
1427  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1428  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1429  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1430  xmm5 = xmm5 - A.load(i+IT::size*4UL,j) * x1;
1431  xmm6 = xmm6 - A.load(i+IT::size*5UL,j) * x1;
1432  xmm7 = xmm7 - A.load(i+IT::size*6UL,j) * x1;
1433  xmm8 = xmm8 - A.load(i+IT::size*7UL,j) * x1;
1434  }
1435  y.store( i , xmm1 );
1436  y.store( i+IT::size , xmm2 );
1437  y.store( i+IT::size*2UL, xmm3 );
1438  y.store( i+IT::size*3UL, xmm4 );
1439  y.store( i+IT::size*4UL, xmm5 );
1440  y.store( i+IT::size*5UL, xmm6 );
1441  y.store( i+IT::size*6UL, xmm7 );
1442  y.store( i+IT::size*7UL, xmm8 );
1443  }
1444  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1445  IntrinsicType xmm1( y.load(i ) );
1446  IntrinsicType xmm2( y.load(i+IT::size ) );
1447  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1448  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1449  for( size_t j=0UL; j<N; ++j ) {
1450  const IntrinsicType x1( set( x[j] ) );
1451  xmm1 = xmm1 - A.load(i ,j) * x1;
1452  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1453  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1454  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1455  }
1456  y.store( i , xmm1 );
1457  y.store( i+IT::size , xmm2 );
1458  y.store( i+IT::size*2UL, xmm3 );
1459  y.store( i+IT::size*3UL, xmm4 );
1460  }
1461  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1462  IntrinsicType xmm1( y.load(i ) );
1463  IntrinsicType xmm2( y.load(i+IT::size ) );
1464  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1465  for( size_t j=0UL; j<N; ++j ) {
1466  const IntrinsicType x1( set( x[j] ) );
1467  xmm1 = xmm1 - A.load(i ,j) * x1;
1468  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1469  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1470  }
1471  y.store( i , xmm1 );
1472  y.store( i+IT::size , xmm2 );
1473  y.store( i+IT::size*2UL, xmm3 );
1474  }
1475  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1476  IntrinsicType xmm1( y.load(i ) );
1477  IntrinsicType xmm2( y.load(i+IT::size) );
1478  for( size_t j=0UL; j<N; ++j ) {
1479  const IntrinsicType x1( set( x[j] ) );
1480  xmm1 = xmm1 - A.load(i ,j) * x1;
1481  xmm2 = xmm2 - A.load(i+IT::size,j) * x1;
1482  }
1483  y.store( i , xmm1 );
1484  y.store( i+IT::size, xmm2 );
1485  }
1486  if( i < M ) {
1487  IntrinsicType xmm1( y.load(i) );
1488  for( size_t j=0UL; j<N; ++j ) {
1489  xmm1 = xmm1 - A.load(i,j) * set( x[j] );
1490  }
1491  y.store( i, xmm1 );
1492  }
1493  }
1495  //**********************************************************************************************
1496 
1497  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1511  template< typename VT1 // Type of the left-hand side target vector
1512  , typename MT1 // Type of the left-hand side matrix operand
1513  , typename VT2 > // Type of the right-hand side vector operand
1514  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1515  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1516  {
1517  selectDefaultSubAssignKernel( y, A, x );
1518  }
1520  //**********************************************************************************************
1521 
1522  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1523 #if BLAZE_BLAS_MODE
1524 
1537  template< typename VT1 // Type of the left-hand side target vector
1538  , typename MT1 // Type of the left-hand side matrix operand
1539  , typename VT2 > // Type of the right-hand side vector operand
1540  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1541  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1542  {
1543  sgemv( y, A, x, -1.0F, 1.0F );
1544  }
1546 #endif
1547  //**********************************************************************************************
1548 
1549  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1550 #if BLAZE_BLAS_MODE
1551 
1564  template< typename VT1 // Type of the left-hand side target vector
1565  , typename MT1 // Type of the left-hand side matrix operand
1566  , typename VT2 > // Type of the right-hand side vector operand
1567  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1568  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1569  {
1570  dgemv( y, A, x, -1.0, 1.0 );
1571  }
1573 #endif
1574  //**********************************************************************************************
1575 
1576  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1577 #if BLAZE_BLAS_MODE
1578 
1591  template< typename VT1 // Type of the left-hand side target vector
1592  , typename MT1 // Type of the left-hand side matrix operand
1593  , typename VT2 > // Type of the right-hand side vector operand
1594  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1595  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1596  {
1597  cgemv( y, A, x, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1598  }
1600 #endif
1601  //**********************************************************************************************
1602 
1603  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1604 #if BLAZE_BLAS_MODE
1605 
1618  template< typename VT1 // Type of the left-hand side target vector
1619  , typename MT1 // Type of the left-hand side matrix operand
1620  , typename VT2 > // Type of the right-hand side vector operand
1621  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1622  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1623  {
1624  zgemv( y, A, x, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1625  }
1627 #endif
1628  //**********************************************************************************************
1629 
1630  //**Subtraction assignment to sparse vectors****************************************************
1631  // No special implementation for the subtraction assignment to sparse vectors.
1632  //**********************************************************************************************
1633 
1634  //**Multiplication assignment to dense vectors**************************************************
1647  template< typename VT1 > // Type of the target dense vector
1648  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1649  {
1651 
1655 
1656  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1657 
1658  const ResultType tmp( serial( rhs ) );
1659  multAssign( ~lhs, tmp );
1660  }
1662  //**********************************************************************************************
1663 
1664  //**Multiplication assignment to sparse vectors*************************************************
1665  // No special implementation for the multiplication assignment to sparse vectors.
1666  //**********************************************************************************************
1667 
1668  //**SMP assignment to dense vectors*************************************************************
1683  template< typename VT1 > // Type of the target dense vector
1684  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1685  smpAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1686  {
1688 
1689  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1690 
1691  if( rhs.mat_.rows() == 0UL ) {
1692  return;
1693  }
1694  else if( rhs.mat_.columns() == 0UL ) {
1695  reset( ~lhs );
1696  return;
1697  }
1698 
1699  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1700  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1701 
1702  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1703  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1704  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1705  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1706 
1707  smpAssign( ~lhs, A * x );
1708  }
1710  //**********************************************************************************************
1711 
1712  //**SMP assignment to sparse vectors************************************************************
1727  template< typename VT1 > // Type of the target sparse vector
1728  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1729  smpAssign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1730  {
1732 
1736 
1737  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1738 
1739  const ResultType tmp( rhs );
1740  smpAssign( ~lhs, tmp );
1741  }
1743  //**********************************************************************************************
1744 
1745  //**SMP addition assignment to dense vectors****************************************************
1760  template< typename VT1 > // Type of the target dense vector
1761  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1762  smpAddAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1763  {
1765 
1766  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1767 
1768  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1769  return;
1770  }
1771 
1772  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1773  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1774 
1775  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1776  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1777  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1778  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1779 
1780  smpAddAssign( ~lhs, A * x );
1781  }
1783  //**********************************************************************************************
1784 
1785  //**SMP addition assignment to sparse vectors***************************************************
1786  // No special implementation for the SMP addition assignment to sparse vectors.
1787  //**********************************************************************************************
1788 
1789  //**SMP subtraction assignment to dense vectors*************************************************
1804  template< typename VT1 > // Type of the target dense vector
1805  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1806  smpSubAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1807  {
1809 
1810  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1811 
1812  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1813  return;
1814  }
1815 
1816  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1817  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1818 
1819  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1820  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1821  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1822  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1823 
1824  smpSubAssign( ~lhs, A * x );
1825  }
1827  //**********************************************************************************************
1828 
1829  //**SMP subtraction assignment to sparse vectors************************************************
1830  // No special implementation for the SMP subtraction assignment to sparse vectors.
1831  //**********************************************************************************************
1832 
1833  //**SMP multiplication assignment to dense vectors**********************************************
1848  template< typename VT1 > // Type of the target dense vector
1849  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1850  smpMultAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1851  {
1853 
1857 
1858  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1859 
1860  const ResultType tmp( rhs );
1861  smpMultAssign( ~lhs, tmp );
1862  }
1864  //**********************************************************************************************
1865 
1866  //**SMP multiplication assignment to sparse vectors*********************************************
1867  // No special implementation for the SMP multiplication assignment to sparse vectors.
1868  //**********************************************************************************************
1869 
1870  //**Compile time checks*************************************************************************
1878  //**********************************************************************************************
1879 };
1880 //*************************************************************************************************
1881 
1882 
1883 
1884 
1885 //=================================================================================================
1886 //
1887 // DVECSCALARMULTEXPR SPECIALIZATION
1888 //
1889 //=================================================================================================
1890 
1891 //*************************************************************************************************
1900 template< typename MT // Type of the left-hand side dense matrix
1901  , typename VT // Type of the right-hand side dense vector
1902  , typename ST > // Type of the side scalar value
1903 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
1904  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1905  , private VecScalarMultExpr
1906  , private Computation
1907 {
1908  private:
1909  //**Type definitions****************************************************************************
1910  typedef TDMatDVecMultExpr<MT,VT> MVM;
1911  typedef typename MVM::ResultType RES;
1912  typedef typename MT::ResultType MRT;
1913  typedef typename VT::ResultType VRT;
1914  typedef typename MRT::ElementType MET;
1915  typedef typename VRT::ElementType VET;
1916  typedef typename MT::CompositeType MCT;
1917  typedef typename VT::CompositeType VCT;
1918  //**********************************************************************************************
1919 
1920  //**********************************************************************************************
1922  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1923  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1924  //**********************************************************************************************
1925 
1926  //**********************************************************************************************
1928  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1929  //**********************************************************************************************
1930 
1931  //**********************************************************************************************
1933 
1936  template< typename T1 >
1937  struct UseSMPAssign {
1938  enum { value = ( evaluateMatrix || evaluateVector ) };
1939  };
1940  //**********************************************************************************************
1941 
1942  //**********************************************************************************************
1944 
1947  template< typename T1, typename T2, typename T3, typename T4 >
1948  struct UseSinglePrecisionKernel {
1949  enum { value = BLAZE_BLAS_MODE &&
1950  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1951  IsFloat<typename T1::ElementType>::value &&
1952  IsFloat<typename T2::ElementType>::value &&
1953  IsFloat<typename T3::ElementType>::value &&
1954  !IsComplex<T4>::value };
1955  };
1956  //**********************************************************************************************
1957 
1958  //**********************************************************************************************
1960 
1963  template< typename T1, typename T2, typename T3, typename T4 >
1964  struct UseDoublePrecisionKernel {
1965  enum { value = BLAZE_BLAS_MODE &&
1966  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1967  IsDouble<typename T1::ElementType>::value &&
1968  IsDouble<typename T2::ElementType>::value &&
1969  IsDouble<typename T3::ElementType>::value &&
1970  !IsComplex<T4>::value };
1971  };
1972  //**********************************************************************************************
1973 
1974  //**********************************************************************************************
1976 
1979  template< typename T1, typename T2, typename T3 >
1980  struct UseSinglePrecisionComplexKernel {
1981  typedef complex<float> Type;
1982  enum { value = BLAZE_BLAS_MODE &&
1983  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1984  IsSame<typename T1::ElementType,Type>::value &&
1985  IsSame<typename T2::ElementType,Type>::value &&
1986  IsSame<typename T3::ElementType,Type>::value };
1987  };
1988  //**********************************************************************************************
1989 
1990  //**********************************************************************************************
1992 
1995  template< typename T1, typename T2, typename T3 >
1996  struct UseDoublePrecisionComplexKernel {
1997  typedef complex<double> Type;
1998  enum { value = BLAZE_BLAS_MODE &&
1999  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2000  IsSame<typename T1::ElementType,Type>::value &&
2001  IsSame<typename T2::ElementType,Type>::value &&
2002  IsSame<typename T3::ElementType,Type>::value };
2003  };
2004  //**********************************************************************************************
2005 
2006  //**********************************************************************************************
2008 
2010  template< typename T1, typename T2, typename T3, typename T4 >
2011  struct UseDefaultKernel {
2012  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2013  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2014  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2015  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2016  };
2017  //**********************************************************************************************
2018 
2019  //**********************************************************************************************
2021 
2024  template< typename T1, typename T2, typename T3, typename T4 >
2025  struct UseVectorizedDefaultKernel {
2026  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2027  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2028  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2029  IsSame<typename T1::ElementType,T4>::value &&
2030  IntrinsicTrait<typename T1::ElementType>::addition &&
2031  IntrinsicTrait<typename T1::ElementType>::multiplication };
2032  };
2033  //**********************************************************************************************
2034 
2035  public:
2036  //**Type definitions****************************************************************************
2037  typedef DVecScalarMultExpr<MVM,ST,false> This;
2038  typedef typename MultTrait<RES,ST>::Type ResultType;
2039  typedef typename ResultType::TransposeType TransposeType;
2040  typedef typename ResultType::ElementType ElementType;
2041  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2042  typedef const ElementType ReturnType;
2043  typedef const ResultType CompositeType;
2044 
2046  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
2047 
2049  typedef ST RightOperand;
2050 
2052  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
2053 
2055  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
2056  //**********************************************************************************************
2057 
2058  //**Compilation flags***************************************************************************
2060  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
2061  IsSame<MET,VET>::value &&
2062  IsSame<MET,ST>::value &&
2063  IntrinsicTrait<MET>::addition &&
2064  IntrinsicTrait<MET>::multiplication };
2065 
2067  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2068  !evaluateVector && VT::smpAssignable };
2069  //**********************************************************************************************
2070 
2071  //**Constructor*********************************************************************************
2077  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
2078  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2079  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2080  {}
2081  //**********************************************************************************************
2082 
2083  //**Subscript operator**************************************************************************
2089  inline ReturnType operator[]( size_t index ) const {
2090  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2091  return vector_[index] * scalar_;
2092  }
2093  //**********************************************************************************************
2094 
2095  //**Size function*******************************************************************************
2100  inline size_t size() const {
2101  return vector_.size();
2102  }
2103  //**********************************************************************************************
2104 
2105  //**Left operand access*************************************************************************
2110  inline LeftOperand leftOperand() const {
2111  return vector_;
2112  }
2113  //**********************************************************************************************
2114 
2115  //**Right operand access************************************************************************
2120  inline RightOperand rightOperand() const {
2121  return scalar_;
2122  }
2123  //**********************************************************************************************
2124 
2125  //**********************************************************************************************
2131  template< typename T >
2132  inline bool canAlias( const T* alias ) const {
2133  return vector_.canAlias( alias );
2134  }
2135  //**********************************************************************************************
2136 
2137  //**********************************************************************************************
2143  template< typename T >
2144  inline bool isAliased( const T* alias ) const {
2145  return vector_.isAliased( alias );
2146  }
2147  //**********************************************************************************************
2148 
2149  //**********************************************************************************************
2154  inline bool isAligned() const {
2155  return vector_.isAligned();
2156  }
2157  //**********************************************************************************************
2158 
2159  //**********************************************************************************************
2164  inline bool canSMPAssign() const {
2165  typename MVM::LeftOperand A( vector_.leftOperand() );
2166  return ( !BLAZE_BLAS_IS_PARALLEL ||
2167  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2168  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2170  }
2171  //**********************************************************************************************
2172 
2173  private:
2174  //**Member variables****************************************************************************
2175  LeftOperand vector_;
2176  RightOperand scalar_;
2177  //**********************************************************************************************
2178 
2179  //**BLAS kernel (single precision)**************************************************************
2180 #if BLAZE_BLAS_MODE
2181 
2195  template< typename VT1 // Type of the left-hand side target vector
2196  , typename MT1 // Type of the left-hand side matrix operand
2197  , typename VT2 > // Type of the right-hand side vector operand
2198  static inline void sgemv( VT1& y, const MT1& A, const VT2& x, float alpha, float beta )
2199  {
2200  using boost::numeric_cast;
2201 
2205 
2206  const int M ( numeric_cast<int>( A.rows() ) );
2207  const int N ( numeric_cast<int>( A.columns() ) );
2208  const int lda( numeric_cast<int>( A.spacing() ) );
2209 
2210  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
2211  A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2212  }
2213 #endif
2214  //**********************************************************************************************
2215 
2216  //**BLAS kernel (double precision)**************************************************************
2217 #if BLAZE_BLAS_MODE
2218 
2232  template< typename VT1 // Type of the left-hand side target vector
2233  , typename MT1 // Type of the left-hand side matrix operand
2234  , typename VT2 > // Type of the right-hand side vector operand
2235  static inline void dgemv( VT1& y, const MT1& A, const VT2& x, double alpha, double beta )
2236  {
2237  using boost::numeric_cast;
2238 
2242 
2243  const int M ( numeric_cast<int>( A.rows() ) );
2244  const int N ( numeric_cast<int>( A.columns() ) );
2245  const int lda( numeric_cast<int>( A.spacing() ) );
2246 
2247  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
2248  A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2249  }
2250 #endif
2251  //**********************************************************************************************
2252 
2253  //**BLAS kernel (single precision complex)******************************************************
2254 #if BLAZE_BLAS_MODE
2255 
2269  template< typename VT1 // Type of the left-hand side target vector
2270  , typename MT1 // Type of the left-hand side matrix operand
2271  , typename VT2 > // Type of the right-hand side vector operand
2272  static inline void cgemv( VT1& y, const MT1& A, const VT2& x,
2273  complex<float> alpha, complex<float> beta )
2274  {
2275  using boost::numeric_cast;
2276 
2280  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2281  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2282  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2283 
2284  const int M ( numeric_cast<int>( A.rows() ) );
2285  const int N ( numeric_cast<int>( A.columns() ) );
2286  const int lda( numeric_cast<int>( A.spacing() ) );
2287 
2288  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2289  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2290  }
2291 #endif
2292  //**********************************************************************************************
2293 
2294  //**BLAS kernel (double precision complex)******************************************************
2295 #if BLAZE_BLAS_MODE
2296 
2310  template< typename VT1 // Type of the left-hand side target vector
2311  , typename MT1 // Type of the left-hand side matrix operand
2312  , typename VT2 > // Type of the right-hand side vector operand
2313  static inline void zgemv( VT1& y, const MT1& A, const VT2& x,
2314  complex<double> alpha, complex<double> beta )
2315  {
2316  using boost::numeric_cast;
2317 
2321  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2322  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2323  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2324 
2325  const int M ( numeric_cast<int>( A.rows() ) );
2326  const int N ( numeric_cast<int>( A.columns() ) );
2327  const int lda( numeric_cast<int>( A.spacing() ) );
2328 
2329  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2330  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2331  }
2332 #endif
2333  //**********************************************************************************************
2334 
2335  //**Assignment to dense vectors*****************************************************************
2347  template< typename VT1 > // Type of the target dense vector
2348  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2349  {
2351 
2352  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2353 
2354  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2355  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2356 
2357  if( left.rows() == 0UL ) {
2358  return;
2359  }
2360  else if( left.columns() == 0UL ) {
2361  reset( ~lhs );
2362  return;
2363  }
2364 
2365  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2366  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2367 
2368  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2369  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2370  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2371  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2372 
2373  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2374  }
2375  //**********************************************************************************************
2376 
2377  //**Assignment to dense vectors (kernel selection)**********************************************
2388  template< typename VT1 // Type of the left-hand side target vector
2389  , typename MT1 // Type of the left-hand side matrix operand
2390  , typename VT2 // Type of the right-hand side vector operand
2391  , typename ST2 > // Type of the scalar value
2392  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2393  {
2394  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2395  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2396  DVecScalarMultExpr::selectDefaultAssignKernel( y, A, x, scalar );
2397  else
2398  DVecScalarMultExpr::selectBlasAssignKernel( y, A, x, scalar );
2399  }
2400  //**********************************************************************************************
2401 
2402  //**Default assignment to dense vectors*********************************************************
2416  template< typename VT1 // Type of the left-hand side target vector
2417  , typename MT1 // Type of the left-hand side matrix operand
2418  , typename VT2 // Type of the right-hand side vector operand
2419  , typename ST2 > // Type of the scalar value
2420  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2421  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2422  {
2423  const size_t M( A.rows() );
2424  const size_t N( A.columns() );
2425 
2426  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
2427  const size_t iend( M & size_t(-2) );
2428 
2429  for( size_t i=0UL; i<M; ++i ) {
2430  y[i] = x[0UL] * A(i,0UL);
2431  }
2432  for( size_t j=1UL; j<N; ++j ) {
2433  for( size_t i=0UL; i<iend; i+=2UL ) {
2434  y[i ] += x[j] * A(i ,j);
2435  y[i+1UL] += x[j] * A(i+1UL,j);
2436  }
2437  if( iend < M ) {
2438  y[iend] += x[j] * A(iend,j);
2439  }
2440  }
2441  for( size_t i=0UL; i<M; ++i ) {
2442  y[i] *= scalar;
2443  }
2444  }
2445  //**********************************************************************************************
2446 
2447  //**Vectorized default assignment to dense vectors**********************************************
2461  template< typename VT1 // Type of the left-hand side target vector
2462  , typename MT1 // Type of the left-hand side matrix operand
2463  , typename VT2 // Type of the right-hand side vector operand
2464  , typename ST2 > // Type of the scalar value
2465  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2466  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2467  {
2468  typedef IntrinsicTrait<ElementType> IT;
2469 
2470  const size_t M( A.rows() );
2471  const size_t N( A.columns() );
2472 
2473  const IntrinsicType factor( set( scalar ) );
2474 
2475  size_t i( 0UL );
2476 
2477  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2478  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2479  for( size_t j=0UL; j<N; ++j ) {
2480  const IntrinsicType x1( set( x[j] ) );
2481  xmm1 = xmm1 + A.load(i ,j) * x1;
2482  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2483  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2484  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2485  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2486  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2487  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2488  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2489  }
2490  y.store( i , xmm1*factor );
2491  y.store( i+IT::size , xmm2*factor );
2492  y.store( i+IT::size*2UL, xmm3*factor );
2493  y.store( i+IT::size*3UL, xmm4*factor );
2494  y.store( i+IT::size*4UL, xmm5*factor );
2495  y.store( i+IT::size*5UL, xmm6*factor );
2496  y.store( i+IT::size*6UL, xmm7*factor );
2497  y.store( i+IT::size*7UL, xmm8*factor );
2498  }
2499  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2500  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2501  for( size_t j=0UL; j<N; ++j ) {
2502  const IntrinsicType x1( set( x[j] ) );
2503  xmm1 = xmm1 + A.load(i ,j) * x1;
2504  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2505  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2506  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2507  }
2508  y.store( i , xmm1*factor );
2509  y.store( i+IT::size , xmm2*factor );
2510  y.store( i+IT::size*2UL, xmm3*factor );
2511  y.store( i+IT::size*3UL, xmm4*factor );
2512  }
2513  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2514  IntrinsicType xmm1, xmm2, xmm3;
2515  for( size_t j=0UL; j<N; ++j ) {
2516  const IntrinsicType x1( set( x[j] ) );
2517  xmm1 = xmm1 + A.load(i ,j) * x1;
2518  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2519  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2520  }
2521  y.store( i , xmm1*factor );
2522  y.store( i+IT::size , xmm2*factor );
2523  y.store( i+IT::size*2UL, xmm3*factor );
2524  }
2525  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2526  IntrinsicType xmm1, xmm2;
2527  for( size_t j=0UL; j<N; ++j ) {
2528  const IntrinsicType x1( set( x[j] ) );
2529  xmm1 = xmm1 + A.load(i ,j) * x1;
2530  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2531  }
2532  y.store( i , xmm1*factor );
2533  y.store( i+IT::size, xmm2*factor );
2534  }
2535  if( i < M ) {
2536  IntrinsicType xmm1;
2537  for( size_t j=0UL; j<N; ++j ) {
2538  const IntrinsicType x1( set( x[j] ) );
2539  xmm1 = xmm1 + A.load(i,j) * x1;
2540  }
2541  y.store( i, xmm1*factor );
2542  }
2543  }
2544  //**********************************************************************************************
2545 
2546  //**BLAS-based assignment to dense vectors (default)********************************************
2560  template< typename VT1 // Type of the left-hand side target vector
2561  , typename MT1 // Type of the left-hand side matrix operand
2562  , typename VT2 // Type of the right-hand side vector operand
2563  , typename ST2 > // Type of the scalar value
2564  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2565  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2566  {
2567  selectDefaultAssignKernel( y, A, x, scalar );
2568  }
2569  //**********************************************************************************************
2570 
2571  //**BLAS-based assignment to dense vectors (single precision)***********************************
2572 #if BLAZE_BLAS_MODE
2573 
2586  template< typename VT1 // Type of the left-hand side target vector
2587  , typename MT1 // Type of the left-hand side matrix operand
2588  , typename VT2 // Type of the right-hand side vector operand
2589  , typename ST2 > // Type of the scalar value
2590  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2591  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2592  {
2593  sgemv( y, A, x, scalar, 0.0F );
2594  }
2595 #endif
2596  //**********************************************************************************************
2597 
2598  //**BLAS-based assignment to dense vectors (double precision)***********************************
2599 #if BLAZE_BLAS_MODE
2600 
2613  template< typename VT1 // Type of the left-hand side target vector
2614  , typename MT1 // Type of the left-hand side matrix operand
2615  , typename VT2 // Type of the right-hand side vector operand
2616  , typename ST2 > // Type of the scalar value
2617  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2618  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2619  {
2620  dgemv( y, A, x, scalar, 0.0 );
2621  }
2622 #endif
2623  //**********************************************************************************************
2624 
2625  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2626 #if BLAZE_BLAS_MODE
2627 
2640  template< typename VT1 // Type of the left-hand side target vector
2641  , typename MT1 // Type of the left-hand side matrix operand
2642  , typename VT2 // Type of the right-hand side vector operand
2643  , typename ST2 > // Type of the scalar value
2644  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2645  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2646  {
2647  cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
2648  }
2649 #endif
2650  //**********************************************************************************************
2651 
2652  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2653 #if BLAZE_BLAS_MODE
2654 
2667  template< typename VT1 // Type of the left-hand side target vector
2668  , typename MT1 // Type of the left-hand side matrix operand
2669  , typename VT2 // Type of the right-hand side vector operand
2670  , typename ST2 > // Type of the scalar value
2671  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2672  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2673  {
2674  zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
2675  }
2676 #endif
2677  //**********************************************************************************************
2678 
2679  //**Assignment to sparse vectors****************************************************************
2691  template< typename VT1 > // Type of the target sparse vector
2692  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2693  {
2695 
2699 
2700  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2701 
2702  const ResultType tmp( serial( rhs ) );
2703  assign( ~lhs, tmp );
2704  }
2705  //**********************************************************************************************
2706 
2707  //**Addition assignment to dense vectors********************************************************
2719  template< typename VT1 > // Type of the target dense vector
2720  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2721  {
2723 
2724  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2725 
2726  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2727  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2728 
2729  if( left.rows() == 0UL || left.columns() == 0UL ) {
2730  return;
2731  }
2732 
2733  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2734  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2735 
2736  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2737  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2738  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2739  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2740 
2741  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2742  }
2743  //**********************************************************************************************
2744 
2745  //**Addition assignment to dense vectors (kernel selection)*************************************
2756  template< typename VT1 // Type of the left-hand side target vector
2757  , typename MT1 // Type of the left-hand side matrix operand
2758  , typename VT2 // Type of the right-hand side vector operand
2759  , typename ST2 > // Type of the scalar value
2760  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2761  {
2762  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2763  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2764  DVecScalarMultExpr::selectDefaultAddAssignKernel( y, A, x, scalar );
2765  else
2766  DVecScalarMultExpr::selectBlasAddAssignKernel( y, A, x, scalar );
2767  }
2768  //**********************************************************************************************
2769 
2770  //**Default addition assignment to dense vectors************************************************
2784  template< typename VT1 // Type of the left-hand side target vector
2785  , typename MT1 // Type of the left-hand side matrix operand
2786  , typename VT2 // Type of the right-hand side vector operand
2787  , typename ST2 > // Type of the scalar value
2788  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2789  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2790  {
2791  y.addAssign( A * x * scalar );
2792  }
2793  //**********************************************************************************************
2794 
2795  //**Vectorized default addition assignment to dense vectors*************************************
2809  template< typename VT1 // Type of the left-hand side target vector
2810  , typename MT1 // Type of the left-hand side matrix operand
2811  , typename VT2 // Type of the right-hand side vector operand
2812  , typename ST2 > // Type of the scalar value
2813  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2814  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2815  {
2816  typedef IntrinsicTrait<ElementType> IT;
2817 
2818  const size_t M( A.rows() );
2819  const size_t N( A.columns() );
2820 
2821  const IntrinsicType factor( set( scalar ) );
2822 
2823  size_t i( 0UL );
2824 
2825  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2826  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2827  for( size_t j=0UL; j<N; ++j ) {
2828  const IntrinsicType x1( set( x[j] ) );
2829  xmm1 = xmm1 + A.load(i ,j) * x1;
2830  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2831  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2832  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2833  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2834  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2835  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2836  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2837  }
2838  y.store( i , y.load(i ) + xmm1*factor );
2839  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2840  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2841  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2842  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
2843  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
2844  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
2845  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
2846  }
2847  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2848  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2849  for( size_t j=0UL; j<N; ++j ) {
2850  const IntrinsicType x1( set( x[j] ) );
2851  xmm1 = xmm1 + A.load(i ,j) * x1;
2852  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2853  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2854  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2855  }
2856  y.store( i , y.load(i ) + xmm1*factor );
2857  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2858  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2859  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2860  }
2861  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2862  IntrinsicType xmm1, xmm2, xmm3;
2863  for( size_t j=0UL; j<N; ++j ) {
2864  const IntrinsicType x1( set( x[j] ) );
2865  xmm1 = xmm1 + A.load(i ,j) * x1;
2866  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2867  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2868  }
2869  y.store( i , y.load(i ) + xmm1*factor );
2870  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2871  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2872  }
2873  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2874  IntrinsicType xmm1, xmm2;
2875  for( size_t j=0UL; j<N; ++j ) {
2876  const IntrinsicType x1( set( x[j] ) );
2877  xmm1 = xmm1 + A.load(i ,j) * x1;
2878  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2879  }
2880  y.store( i , y.load(i ) + xmm1*factor );
2881  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
2882  }
2883  if( i < M ) {
2884  IntrinsicType xmm1;
2885  for( size_t j=0UL; j<N; ++j ) {
2886  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
2887  }
2888  y.store( i, y.load(i) + xmm1*factor );
2889  }
2890  }
2891  //**********************************************************************************************
2892 
2893  //**BLAS-based addition assignment to dense vectors (default)***********************************
2907  template< typename VT1 // Type of the left-hand side target vector
2908  , typename MT1 // Type of the left-hand side matrix operand
2909  , typename VT2 // Type of the right-hand side vector operand
2910  , typename ST2 > // Type of the scalar value
2911  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2912  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2913  {
2914  selectDefaultAddAssignKernel( y, A, x, scalar );
2915  }
2916  //**********************************************************************************************
2917 
2918  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2919 #if BLAZE_BLAS_MODE
2920 
2933  template< typename VT1 // Type of the left-hand side target vector
2934  , typename MT1 // Type of the left-hand side matrix operand
2935  , typename VT2 // Type of the right-hand side vector operand
2936  , typename ST2 > // Type of the scalar value
2937  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2938  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2939  {
2940  sgemv( y, A, x, scalar, 1.0F );
2941  }
2942 #endif
2943  //**********************************************************************************************
2944 
2945  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2946 #if BLAZE_BLAS_MODE
2947 
2960  template< typename VT1 // Type of the left-hand side target vector
2961  , typename MT1 // Type of the left-hand side matrix operand
2962  , typename VT2 // Type of the right-hand side vector operand
2963  , typename ST2 > // Type of the scalar value
2964  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2965  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2966  {
2967  dgemv( y, A, x, scalar, 1.0 );
2968  }
2969 #endif
2970  //**********************************************************************************************
2971 
2972  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2973 #if BLAZE_BLAS_MODE
2974 
2987  template< typename VT1 // Type of the left-hand side target vector
2988  , typename MT1 // Type of the left-hand side matrix operand
2989  , typename VT2 // Type of the right-hand side vector operand
2990  , typename ST2 > // Type of the scalar value
2991  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2992  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2993  {
2994  cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2995  }
2996 #endif
2997  //**********************************************************************************************
2998 
2999  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
3000 #if BLAZE_BLAS_MODE
3001 
3014  template< typename VT1 // Type of the left-hand side target vector
3015  , typename MT1 // Type of the left-hand side matrix operand
3016  , typename VT2 // Type of the right-hand side vector operand
3017  , typename ST2 > // Type of the scalar value
3018  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3019  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3020  {
3021  zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3022  }
3023 #endif
3024  //**********************************************************************************************
3025 
3026  //**Addition assignment to sparse vectors*******************************************************
3027  // No special implementation for the addition assignment to sparse vectors.
3028  //**********************************************************************************************
3029 
3030  //**Subtraction assignment to dense vectors*****************************************************
3042  template< typename VT1 > // Type of the target dense vector
3043  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3044  {
3046 
3047  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3048 
3049  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3050  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3051 
3052  if( left.rows() == 0UL || left.columns() == 0UL ) {
3053  return;
3054  }
3055 
3056  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3057  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3058 
3059  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3060  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3061  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3062  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3063 
3064  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
3065  }
3066  //**********************************************************************************************
3067 
3068  //**Subtraction assignment to dense vectors (kernel selection)**********************************
3079  template< typename VT1 // Type of the left-hand side target vector
3080  , typename MT1 // Type of the left-hand side matrix operand
3081  , typename VT2 // Type of the right-hand side vector operand
3082  , typename ST2 > // Type of the scalar value
3083  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3084  {
3085  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
3086  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3087  DVecScalarMultExpr::selectDefaultSubAssignKernel( y, A, x, scalar );
3088  else
3089  DVecScalarMultExpr::selectBlasSubAssignKernel( y, A, x, scalar );
3090  }
3091  //**********************************************************************************************
3092 
3093  //**Default subtraction assignment to dense vectors*********************************************
3107  template< typename VT1 // Type of the left-hand side target vector
3108  , typename MT1 // Type of the left-hand side matrix operand
3109  , typename VT2 // Type of the right-hand side vector operand
3110  , typename ST2 > // Type of the scalar value
3111  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3112  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3113  {
3114  y.subAssign( A * x * scalar );
3115  }
3116  //**********************************************************************************************
3117 
3118  //**Vectorized default subtraction assignment to dense vectors**********************************
3132  template< typename VT1 // Type of the left-hand side target vector
3133  , typename MT1 // Type of the left-hand side matrix operand
3134  , typename VT2 // Type of the right-hand side vector operand
3135  , typename ST2 > // Type of the scalar value
3136  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3137  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3138  {
3139  typedef IntrinsicTrait<ElementType> IT;
3140 
3141  const size_t M( A.rows() );
3142  const size_t N( A.columns() );
3143 
3144  const IntrinsicType factor( set( scalar ) );
3145 
3146  size_t i( 0UL );
3147 
3148  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3149  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3150  for( size_t j=0UL; j<N; ++j ) {
3151  const IntrinsicType x1( set( x[j] ) );
3152  xmm1 = xmm1 + A.load(i ,j) * x1;
3153  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3154  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3155  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3156  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3157  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3158  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3159  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3160  }
3161  y.store( i , y.load(i ) - xmm1*factor );
3162  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3163  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3164  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
3165  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
3166  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
3167  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
3168  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
3169  }
3170  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3171  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3172  for( size_t j=0UL; j<N; ++j ) {
3173  const IntrinsicType x1( set( x[j] ) );
3174  xmm1 = xmm1 + A.load(i ,j) * x1;
3175  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3176  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3177  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3178  }
3179  y.store( i , y.load(i ) - xmm1*factor );
3180  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3181  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3182  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
3183  }
3184  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
3185  IntrinsicType xmm1, xmm2, xmm3;
3186  for( size_t j=0UL; j<N; ++j ) {
3187  const IntrinsicType x1( set( x[j] ) );
3188  xmm1 = xmm1 + A.load(i ,j) * x1;
3189  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3190  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3191  }
3192  y.store( i , y.load(i ) - xmm1*factor );
3193  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3194  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3195  }
3196  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3197  IntrinsicType xmm1, xmm2;
3198  for( size_t j=0UL; j<N; ++j ) {
3199  const IntrinsicType x1( set( x[j] ) );
3200  xmm1 = xmm1 + A.load(i ,j) * x1;
3201  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3202  }
3203  y.store( i , y.load(i ) - xmm1*factor );
3204  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
3205  }
3206  if( i < M ) {
3207  IntrinsicType xmm1;
3208  for( size_t j=0UL; j<N; ++j ) {
3209  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3210  }
3211  y.store( i, y.load(i) - xmm1*factor );
3212  }
3213  }
3214  //**********************************************************************************************
3215 
3216  //**BLAS-based subtraction assignment to dense vectors (default)********************************
3230  template< typename VT1 // Type of the left-hand side target vector
3231  , typename MT1 // Type of the left-hand side matrix operand
3232  , typename VT2 // Type of the right-hand side vector operand
3233  , typename ST2 > // Type of the scalar value
3234  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3235  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3236  {
3237  selectDefaultSubAssignKernel( y, A, x, scalar );
3238  }
3239  //**********************************************************************************************
3240 
3241  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
3242 #if BLAZE_BLAS_MODE
3243 
3256  template< typename VT1 // Type of the left-hand side target vector
3257  , typename MT1 // Type of the left-hand side matrix operand
3258  , typename VT2 // Type of the right-hand side vector operand
3259  , typename ST2 > // Type of the scalar value
3260  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3261  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3262  {
3263  sgemv( y, A, x, -scalar, 1.0F );
3264  }
3265 #endif
3266  //**********************************************************************************************
3267 
3268  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
3269 #if BLAZE_BLAS_MODE
3270 
3283  template< typename VT1 // Type of the left-hand side target vector
3284  , typename MT1 // Type of the left-hand side matrix operand
3285  , typename VT2 // Type of the right-hand side vector operand
3286  , typename ST2 > // Type of the scalar value
3287  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3288  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3289  {
3290  dgemv( y, A, x, -scalar, 1.0 );
3291  }
3292 #endif
3293  //**********************************************************************************************
3294 
3295  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
3296 #if BLAZE_BLAS_MODE
3297 
3310  template< typename VT1 // Type of the left-hand side target vector
3311  , typename MT1 // Type of the left-hand side matrix operand
3312  , typename VT2 // Type of the right-hand side vector operand
3313  , typename ST2 > // Type of the scalar value
3314  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3315  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3316  {
3317  cgemv( y, A, x, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3318  }
3319 #endif
3320  //**********************************************************************************************
3321 
3322  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
3323 #if BLAZE_BLAS_MODE
3324 
3337  template< typename VT1 // Type of the left-hand side target vector
3338  , typename MT1 // Type of the left-hand side matrix operand
3339  , typename VT2 // Type of the right-hand side vector operand
3340  , typename ST2 > // Type of the scalar value
3341  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3342  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3343  {
3344  zgemv( y, A, x, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3345  }
3346 #endif
3347  //**********************************************************************************************
3348 
3349  //**Subtraction assignment to sparse vectors****************************************************
3350  // No special implementation for the subtraction assignment to sparse vectors.
3351  //**********************************************************************************************
3352 
3353  //**Multiplication assignment to dense vectors**************************************************
3365  template< typename VT1 > // Type of the target dense vector
3366  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3367  {
3369 
3373 
3374  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3375 
3376  const ResultType tmp( serial( rhs ) );
3377  multAssign( ~lhs, tmp );
3378  }
3379  //**********************************************************************************************
3380 
3381  //**Multiplication assignment to sparse vectors*************************************************
3382  // No special implementation for the multiplication assignment to sparse vectors.
3383  //**********************************************************************************************
3384 
3385  //**SMP assignment to dense vectors**************************************************************
3399  template< typename VT1 > // Type of the target dense vector
3400  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3401  smpAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3402  {
3404 
3405  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3406 
3407  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3408  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3409 
3410  if( left.rows() == 0UL ) {
3411  return;
3412  }
3413  else if( left.columns() == 0UL ) {
3414  reset( ~lhs );
3415  return;
3416  }
3417 
3418  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3419  RT x( right ); // Evaluation of the right-hand side dense vector operand
3420 
3421  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3422  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3423  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3424  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3425 
3426  smpAssign( ~lhs, A * x * rhs.scalar_ );
3427  }
3428  //**********************************************************************************************
3429 
3430  //**SMP assignment to sparse vectors************************************************************
3444  template< typename VT1 > // Type of the target sparse vector
3445  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3446  smpAssign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3447  {
3449 
3453 
3454  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3455 
3456  const ResultType tmp( rhs );
3457  smpAssign( ~lhs, tmp );
3458  }
3459  //**********************************************************************************************
3460 
3461  //**SMP addition assignment to dense vectors****************************************************
3475  template< typename VT1 > // Type of the target dense vector
3476  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3477  smpAddAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3478  {
3480 
3481  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3482 
3483  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3484  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3485 
3486  if( left.rows() == 0UL || left.columns() == 0UL ) {
3487  return;
3488  }
3489 
3490  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3491  RT x( right ); // Evaluation of the right-hand side dense vector operand
3492 
3493  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3494  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3495  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3496  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3497 
3498  smpAddAssign( ~lhs, A * x * rhs.scalar_ );
3499  }
3500  //**********************************************************************************************
3501 
3502  //**SMP addition assignment to sparse vectors***************************************************
3503  // No special implementation for the SMP addition assignment to sparse vectors.
3504  //**********************************************************************************************
3505 
3506  //**SMP subtraction assignment to dense vectors*************************************************
3520  template< typename VT1 > // Type of the target dense vector
3521  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3522  smpSubAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3523  {
3525 
3526  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3527 
3528  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3529  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3530 
3531  if( left.rows() == 0UL || left.columns() == 0UL ) {
3532  return;
3533  }
3534 
3535  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3536  RT x( right ); // Evaluation of the right-hand side dense vector operand
3537 
3538  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3539  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3540  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3541  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3542 
3543  smpSubAssign( ~lhs, A * x * rhs.scalar_ );
3544  }
3545  //**********************************************************************************************
3546 
3547  //**SMP subtraction assignment to sparse vectors************************************************
3548  // No special implementation for the SMP subtraction assignment to sparse vectors.
3549  //**********************************************************************************************
3550 
3551  //**SMP multiplication assignment to dense vectors**********************************************
3566  template< typename VT1 > // Type of the target dense vector
3567  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3568  smpMultAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3569  {
3571 
3575 
3576  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3577 
3578  const ResultType tmp( rhs );
3579  smpMultAssign( ~lhs, tmp );
3580  }
3581  //**********************************************************************************************
3582 
3583  //**SMP multiplication assignment to sparse vectors*********************************************
3584  // No special implementation for the SMP multiplication assignment to sparse vectors.
3585  //**********************************************************************************************
3586 
3587  //**Compile time checks*************************************************************************
3596  //**********************************************************************************************
3597 };
3599 //*************************************************************************************************
3600 
3601 
3602 
3603 
3604 //=================================================================================================
3605 //
3606 // GLOBAL BINARY ARITHMETIC OPERATORS
3607 //
3608 //=================================================================================================
3609 
3610 //*************************************************************************************************
3641 template< typename T1 // Type of the left-hand side dense matrix
3642  , typename T2 > // Type of the right-hand side dense vector
3643 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
3645 {
3647 
3648  if( (~mat).columns() != (~vec).size() )
3649  throw std::invalid_argument( "Matrix and vector sizes do not match" );
3650 
3651  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
3652 }
3653 //*************************************************************************************************
3654 
3655 
3656 
3657 
3658 //=================================================================================================
3659 //
3660 // SIZE SPECIALIZATIONS
3661 //
3662 //=================================================================================================
3663 
3664 //*************************************************************************************************
3666 template< typename MT, typename VT >
3667 struct Size< TDMatDVecMultExpr<MT,VT> >
3668  : public Rows<MT>
3669 {};
3671 //*************************************************************************************************
3672 
3673 
3674 
3675 
3676 //=================================================================================================
3677 //
3678 // EXPRESSION TRAIT SPECIALIZATIONS
3679 //
3680 //=================================================================================================
3681 
3682 //*************************************************************************************************
3684 template< typename MT, typename VT, bool AF >
3685 struct SubvectorExprTrait< TDMatDVecMultExpr<MT,VT>, AF >
3686 {
3687  public:
3688  //**********************************************************************************************
3689  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type, VT >::Type Type;
3690  //**********************************************************************************************
3691 };
3693 //*************************************************************************************************
3694 
3695 } // namespace blaze
3696 
3697 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:264
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:114
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:258
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:288
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
Header file for the DenseVector base class.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:251
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:404
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:126
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:259
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:261
Header file for the multiplication trait.
Header file for the IsDouble type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:405
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:113
Header file for the IsBlasCompatible type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:340
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDMatDVecMultExpr.h:406
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:254
Constraints on the storage order of matrix types.
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:255
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:267
Header file for the EnableIf class template.
Header file for the serial shim.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:330
Header file for the IsNumeric type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:374
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:115
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
System settings for the BLAS mode.
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:250
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:253
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
const size_t TDMATDVECMULT_THRESHOLD
Column-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:74
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:362
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
const size_t SMP_TDMATDVECMULT_THRESHOLD
SMP column-major dense matrix/dense vector multiplication threshold.This threshold specifies when a c...
Definition: Thresholds.h:345
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:116
Header file for all intrinsic functionality.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:252
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:303
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:249
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
Header file for basic type definitions.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:118
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:394
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:384
Constraint on the data type.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:350
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:117
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849