All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
65 #include <blaze/system/BLAS.h>
67 #include <blaze/util/Assert.h>
68 #include <blaze/util/Complex.h>
74 #include <blaze/util/DisableIf.h>
75 #include <blaze/util/EnableIf.h>
77 #include <blaze/util/SelectType.h>
78 #include <blaze/util/Types.h>
84 
85 
86 namespace blaze {
87 
88 //=================================================================================================
89 //
90 // CLASS DMATDVECMULTEXPR
91 //
92 //=================================================================================================
93 
94 //*************************************************************************************************
101 template< typename MT // Type of the left-hand side dense matrix
102  , typename VT > // Type of the right-hand side dense vector
103 class DMatDVecMultExpr : public DenseVector< DMatDVecMultExpr<MT,VT>, false >
104  , private MatVecMultExpr
105  , private Computation
106 {
107  private:
108  //**Type definitions****************************************************************************
109  typedef typename MT::ResultType MRT;
110  typedef typename VT::ResultType VRT;
111  typedef typename MRT::ElementType MET;
112  typedef typename VRT::ElementType VET;
113  typedef typename MT::CompositeType MCT;
114  typedef typename VT::CompositeType VCT;
115  //**********************************************************************************************
116 
117  //**********************************************************************************************
119  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
121  //**********************************************************************************************
122 
123  //**********************************************************************************************
125  enum { evaluateVector = IsComputation<VT>::value };
126  //**********************************************************************************************
127 
128  //**********************************************************************************************
130 
134  template< typename T1, typename T2, typename T3 >
135  struct UseSinglePrecisionKernel {
136  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
137  IsFloat<typename T1::ElementType>::value &&
138  IsFloat<typename T2::ElementType>::value &&
139  IsFloat<typename T3::ElementType>::value };
140  };
142  //**********************************************************************************************
143 
144  //**********************************************************************************************
146 
150  template< typename T1, typename T2, typename T3 >
151  struct UseDoublePrecisionKernel {
152  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
153  IsDouble<typename T1::ElementType>::value &&
154  IsDouble<typename T2::ElementType>::value &&
155  IsDouble<typename T3::ElementType>::value };
156  };
158  //**********************************************************************************************
159 
160  //**********************************************************************************************
162 
166  template< typename T1, typename T2, typename T3 >
167  struct UseSinglePrecisionComplexKernel {
168  typedef complex<float> Type;
169  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170  IsSame<typename T1::ElementType,Type>::value &&
171  IsSame<typename T2::ElementType,Type>::value &&
172  IsSame<typename T3::ElementType,Type>::value };
173  };
175  //**********************************************************************************************
176 
177  //**********************************************************************************************
179 
183  template< typename T1, typename T2, typename T3 >
184  struct UseDoublePrecisionComplexKernel {
185  typedef complex<double> Type;
186  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187  IsSame<typename T1::ElementType,Type>::value &&
188  IsSame<typename T2::ElementType,Type>::value &&
189  IsSame<typename T3::ElementType,Type>::value };
190  };
192  //**********************************************************************************************
193 
194  //**********************************************************************************************
196 
199  template< typename T1, typename T2, typename T3 >
200  struct UseDefaultKernel {
201  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
202  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
203  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
204  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
215  template< typename T1, typename T2, typename T3 >
216  struct UseVectorizedDefaultKernel {
217  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
218  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
219  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
220  IntrinsicTrait<typename T1::ElementType>::addition &&
221  IntrinsicTrait<typename T1::ElementType>::multiplication };
222  };
224  //**********************************************************************************************
225 
226  public:
227  //**Type definitions****************************************************************************
233  typedef const ElementType ReturnType;
234  typedef const ResultType CompositeType;
235 
237  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
238 
240  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
241 
244 
247  //**********************************************************************************************
248 
249  //**Compilation flags***************************************************************************
251  enum { vectorizable = 0 };
252 
254  enum { smpAssignable = 0 };
255  //**********************************************************************************************
256 
257  //**Constructor*********************************************************************************
263  explicit inline DMatDVecMultExpr( const MT& mat, const VT& vec )
264  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
265  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
266  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
267  {
268  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
269  }
270  //**********************************************************************************************
271 
272  //**Subscript operator**************************************************************************
278  inline ReturnType operator[]( size_t index ) const {
279  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
280 
281  ElementType res;
282 
283  if( mat_.columns() != 0UL ) {
284  res = mat_(index,0UL) * vec_[0UL];
285  for( size_t j=1UL; j<end_; j+=2UL ) {
286  res += mat_(index,j) * vec_[j] + mat_(index,j+1UL) * vec_[j+1UL];
287  }
288  if( end_ < mat_.columns() ) {
289  res += mat_(index,end_) * vec_[end_];
290  }
291  }
292  else {
293  reset( res );
294  }
295 
296  return res;
297  }
298  //**********************************************************************************************
299 
300  //**Size function*******************************************************************************
305  inline size_t size() const {
306  return mat_.rows();
307  }
308  //**********************************************************************************************
309 
310  //**Left operand access*************************************************************************
315  inline LeftOperand leftOperand() const {
316  return mat_;
317  }
318  //**********************************************************************************************
319 
320  //**Right operand access************************************************************************
325  inline RightOperand rightOperand() const {
326  return vec_;
327  }
328  //**********************************************************************************************
329 
330  //**********************************************************************************************
336  template< typename T >
337  inline bool canAlias( const T* alias ) const {
338  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
339  }
340  //**********************************************************************************************
341 
342  //**********************************************************************************************
348  template< typename T >
349  inline bool isAliased( const T* alias ) const {
350  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
351  }
352  //**********************************************************************************************
353 
354  private:
355  //**Member variables****************************************************************************
358  const size_t end_;
359  //**********************************************************************************************
360 
361  //**Assignment to dense vectors*****************************************************************
374  template< typename VT1 > // Type of the target dense vector
375  friend inline void assign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
376  {
378 
379  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
380 
381  if( rhs.mat_.rows() == 0UL ) {
382  return;
383  }
384  else if( rhs.mat_.columns() == 0UL ) {
385  reset( ~lhs );
386  return;
387  }
388 
389  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
390  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
391 
392  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
393  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
394  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
395  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
396 
397  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
398  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
399  DMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
400  else
401  DMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
402  }
404  //**********************************************************************************************
405 
406  //**Default assignment to dense vectors*********************************************************
420  template< typename VT1 // Type of the left-hand side target vector
421  , typename MT1 // Type of the left-hand side matrix operand
422  , typename VT2 > // Type of the right-hand side vector operand
423  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
424  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
425  {
426  y.assign( A * x );
427  }
429  //**********************************************************************************************
430 
431  //**Vectorized default assignment to dense vectors**********************************************
445  template< typename VT1 // Type of the left-hand side target vector
446  , typename MT1 // Type of the left-hand side matrix operand
447  , typename VT2 > // Type of the right-hand side vector operand
448  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
449  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
450  {
451  typedef IntrinsicTrait<ElementType> IT;
452 
453  const size_t M( A.rows() );
454  const size_t N( A.columns() );
455 
456  size_t i( 0UL );
457 
458  for( ; (i+8UL) <= M; i+=8UL ) {
459  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
460  for( size_t j=0UL; j<N; j+=IT::size ) {
461  const IntrinsicType x1( x.load(j) );
462  xmm1 = xmm1 + A.load(i ,j) * x1;
463  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
464  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
465  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
466  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
467  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
468  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
469  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
470  }
471  y[i ] = sum( xmm1 );
472  y[i+1UL] = sum( xmm2 );
473  y[i+2UL] = sum( xmm3 );
474  y[i+3UL] = sum( xmm4 );
475  y[i+4UL] = sum( xmm5 );
476  y[i+5UL] = sum( xmm6 );
477  y[i+6UL] = sum( xmm7 );
478  y[i+7UL] = sum( xmm8 );
479  }
480  for( ; (i+4UL) <= M; i+=4UL ) {
481  IntrinsicType xmm1, xmm2, xmm3, xmm4;
482  for( size_t j=0UL; j<N; j+=IT::size ) {
483  const IntrinsicType x1( x.load(j) );
484  xmm1 = xmm1 + A.load(i ,j) * x1;
485  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
486  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
487  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
488  }
489  y[i ] = sum( xmm1 );
490  y[i+1UL] = sum( xmm2 );
491  y[i+2UL] = sum( xmm3 );
492  y[i+3UL] = sum( xmm4 );
493  }
494  for( ; (i+3UL) <= M; i+=3UL ) {
495  IntrinsicType xmm1, xmm2, xmm3;
496  for( size_t j=0UL; j<N; j+=IT::size ) {
497  const IntrinsicType x1( x.load(j) );
498  xmm1 = xmm1 + A.load(i ,j) * x1;
499  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
500  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
501  }
502  y[i ] = sum( xmm1 );
503  y[i+1UL] = sum( xmm2 );
504  y[i+2UL] = sum( xmm3 );
505  }
506  for( ; (i+2UL) <= M; i+=2UL ) {
507  IntrinsicType xmm1, xmm2;
508  for( size_t j=0UL; j<N; j+=IT::size ) {
509  const IntrinsicType x1( x.load(j) );
510  xmm1 = xmm1 + A.load(i ,j) * x1;
511  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
512  }
513  y[i ] = sum( xmm1 );
514  y[i+1UL] = sum( xmm2 );
515  }
516  if( i < M ) {
517  IntrinsicType xmm1;
518  for( size_t j=0UL; j<N; j+=IT::size ) {
519  xmm1 = xmm1 + A.load(i,j) * x.load(j);
520  }
521  y[i] = sum( xmm1 );
522  }
523  }
525  //**********************************************************************************************
526 
527  //**BLAS-based assignment to dense vectors (default)********************************************
541  template< typename VT1 // Type of the left-hand side target vector
542  , typename MT1 // Type of the left-hand side matrix operand
543  , typename VT2 > // Type of the right-hand side vector operand
544  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
545  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
546  {
547  selectDefaultAssignKernel( y, A, x );
548  }
550  //**********************************************************************************************
551 
552  //**BLAS-based assignment to dense vectors (single precision)***********************************
553 #if BLAZE_BLAS_MODE
554 
567  template< typename VT1 // Type of the left-hand side target vector
568  , typename MT1 // Type of the left-hand side matrix operand
569  , typename VT2 > // Type of the right-hand side vector operand
570  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
571  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
572  {
573  using boost::numeric_cast;
574 
578 
579  const int M ( numeric_cast<int>( A.rows() ) );
580  const int N ( numeric_cast<int>( A.columns() ) );
581  const int lda( numeric_cast<int>( A.spacing() ) );
582 
583  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
584  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
585  }
587 #endif
588  //**********************************************************************************************
589 
590  //**BLAS-based assignment to dense vectors (double precision)***********************************
591 #if BLAZE_BLAS_MODE
592 
605  template< typename VT1 // Type of the left-hand side target vector
606  , typename MT1 // Type of the left-hand side matrix operand
607  , typename VT2 > // Type of the right-hand side vector operand
608  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
609  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
610  {
611  using boost::numeric_cast;
612 
616 
617  const int M ( numeric_cast<int>( A.rows() ) );
618  const int N ( numeric_cast<int>( A.columns() ) );
619  const int lda( numeric_cast<int>( A.spacing() ) );
620 
621  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
622  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
623  }
625 #endif
626  //**********************************************************************************************
627 
628  //**BLAS-based assignment to dense vectors (single precision complex)***************************
629 #if BLAZE_BLAS_MODE
630 
643  template< typename VT1 // Type of the left-hand side target vector
644  , typename MT1 // Type of the left-hand side matrix operand
645  , typename VT2 > // Type of the right-hand side vector operand
646  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
647  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
648  {
649  using boost::numeric_cast;
650 
654  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
655  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
656  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
657 
658  const int M ( numeric_cast<int>( A.rows() ) );
659  const int N ( numeric_cast<int>( A.columns() ) );
660  const int lda( numeric_cast<int>( A.spacing() ) );
661  const complex<float> alpha( 1.0F, 0.0F );
662  const complex<float> beta ( 0.0F, 0.0F );
663 
664  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
665  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
666  }
668 #endif
669  //**********************************************************************************************
670 
671  //**BLAS-based assignment to dense vectors (double precision complex)***************************
672 #if BLAZE_BLAS_MODE
673 
686  template< typename VT1 // Type of the left-hand side target vector
687  , typename MT1 // Type of the left-hand side matrix operand
688  , typename VT2 > // Type of the right-hand side vector operand
689  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
690  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
691  {
692  using boost::numeric_cast;
693 
697  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
698  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
699  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
700 
701  const int M ( numeric_cast<int>( A.rows() ) );
702  const int N ( numeric_cast<int>( A.columns() ) );
703  const int lda( numeric_cast<int>( A.spacing() ) );
704  const complex<double> alpha( 1.0, 0.0 );
705  const complex<double> beta ( 0.0, 0.0 );
706 
707  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
708  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
709  }
711 #endif
712  //**********************************************************************************************
713 
714  //**Assignment to sparse vectors****************************************************************
727  template< typename VT1 > // Type of the target sparse vector
728  friend inline void assign( SparseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
729  {
731 
735 
736  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
737 
738  const ResultType tmp( rhs );
739  assign( ~lhs, tmp );
740  }
742  //**********************************************************************************************
743 
744  //**Addition assignment to dense vectors********************************************************
757  template< typename VT1 > // Type of the target dense vector
758  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
759  {
761 
762  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
763 
764  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
765  return;
766  }
767 
768  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
769  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
770 
771  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
772  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
773  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
774  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
775 
776  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
777  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
778  DMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
779  else
780  DMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
781  }
783  //**********************************************************************************************
784 
785  //**Default addition assignment to dense vectors************************************************
799  template< typename VT1 // Type of the left-hand side target vector
800  , typename MT1 // Type of the left-hand side matrix operand
801  , typename VT2 > // Type of the right-hand side vector operand
802  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
803  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
804  {
805  y.addAssign( A * x );
806  }
808  //**********************************************************************************************
809 
810  //**Vectorized default addition assignment to dense vectors*************************************
824  template< typename VT1 // Type of the left-hand side target vector
825  , typename MT1 // Type of the left-hand side matrix operand
826  , typename VT2 > // Type of the right-hand side vector operand
827  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
828  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
829  {
830  typedef IntrinsicTrait<ElementType> IT;
831 
832  const size_t M( A.rows() );
833  const size_t N( A.columns() );
834 
835  size_t i( 0UL );
836 
837  for( ; (i+8UL) <= M; i+=8UL ) {
838  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
839  for( size_t j=0UL; j<N; j+=IT::size ) {
840  const IntrinsicType x1( x.load(j) );
841  xmm1 = xmm1 + A.load(i ,j) * x1;
842  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
843  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
844  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
845  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
846  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
847  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
848  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
849  }
850  y[i ] += sum( xmm1 );
851  y[i+1UL] += sum( xmm2 );
852  y[i+2UL] += sum( xmm3 );
853  y[i+3UL] += sum( xmm4 );
854  y[i+4UL] += sum( xmm5 );
855  y[i+5UL] += sum( xmm6 );
856  y[i+6UL] += sum( xmm7 );
857  y[i+7UL] += sum( xmm8 );
858  }
859  for( ; (i+4UL) <= M; i+=4UL ) {
860  IntrinsicType xmm1, xmm2, xmm3, xmm4;
861  for( size_t j=0UL; j<N; j+=IT::size ) {
862  const IntrinsicType x1( x.load(j) );
863  xmm1 = xmm1 + A.load(i ,j) * x1;
864  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
865  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
866  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
867  }
868  y[i ] += sum( xmm1 );
869  y[i+1UL] += sum( xmm2 );
870  y[i+2UL] += sum( xmm3 );
871  y[i+3UL] += sum( xmm4 );
872  }
873  for( ; (i+3UL) <= M; i+=3UL ) {
874  IntrinsicType xmm1, xmm2, xmm3;
875  for( size_t j=0UL; j<N; j+=IT::size ) {
876  const IntrinsicType x1( x.load(j) );
877  xmm1 = xmm1 + A.load(i ,j) * x1;
878  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
879  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
880  }
881  y[i ] += sum( xmm1 );
882  y[i+1UL] += sum( xmm2 );
883  y[i+2UL] += sum( xmm3 );
884  }
885  for( ; (i+2UL) <= M; i+=2UL ) {
886  IntrinsicType xmm1, xmm2;
887  for( size_t j=0UL; j<N; j+=IT::size ) {
888  const IntrinsicType x1( x.load(j) );
889  xmm1 = xmm1 + A.load(i ,j) * x1;
890  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
891  }
892  y[i ] += sum( xmm1 );
893  y[i+1UL] += sum( xmm2 );
894  }
895  if( i < M ) {
896  IntrinsicType xmm1;
897  for( size_t j=0UL; j<N; j+=IT::size ) {
898  xmm1 = xmm1 + A.load(i,j) * x.load(j);
899  }
900  y[i] += sum( xmm1 );
901  }
902  }
904  //**********************************************************************************************
905 
906  //**BLAS-based addition assignment to dense vectors (default)***********************************
920  template< typename VT1 // Type of the left-hand side target vector
921  , typename MT1 // Type of the left-hand side matrix operand
922  , typename VT2 > // Type of the right-hand side vector operand
923  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
924  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
925  {
926  selectDefaultAddAssignKernel( y, A, x );
927  }
929  //**********************************************************************************************
930 
931  //**BLAS-based addition assignment to dense vectors (single precision)**************************
932 #if BLAZE_BLAS_MODE
933 
946  template< typename VT1 // Type of the left-hand side target vector
947  , typename MT1 // Type of the left-hand side matrix operand
948  , typename VT2 > // Type of the right-hand side vector operand
949  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
950  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
951  {
952  using boost::numeric_cast;
953 
957 
958  const int M ( numeric_cast<int>( A.rows() ) );
959  const int N ( numeric_cast<int>( A.columns() ) );
960  const int lda( numeric_cast<int>( A.spacing() ) );
961 
962  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
963  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
964  }
966 #endif
967  //**********************************************************************************************
968 
969  //**BLAS-based addition assignment to dense vectors (double precision)**************************
970 #if BLAZE_BLAS_MODE
971 
984  template< typename VT1 // Type of the left-hand side target vector
985  , typename MT1 // Type of the left-hand side matrix operand
986  , typename VT2 > // Type of the right-hand side vector operand
987  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
988  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
989  {
990  using boost::numeric_cast;
991 
995 
996  const int M ( numeric_cast<int>( A.rows() ) );
997  const int N ( numeric_cast<int>( A.columns() ) );
998  const int lda( numeric_cast<int>( A.spacing() ) );
999 
1000  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
1001  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1002  }
1004 #endif
1005  //**********************************************************************************************
1006 
1007  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1008 #if BLAZE_BLAS_MODE
1009 
1022  template< typename VT1 // Type of the left-hand side target vector
1023  , typename MT1 // Type of the left-hand side matrix operand
1024  , typename VT2 > // Type of the right-hand side vector operand
1025  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1026  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1027  {
1028  using boost::numeric_cast;
1029 
1033  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1034  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1035  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1036 
1037  const int M ( numeric_cast<int>( A.rows() ) );
1038  const int N ( numeric_cast<int>( A.columns() ) );
1039  const int lda( numeric_cast<int>( A.spacing() ) );
1040  const complex<float> alpha( 1.0F, 0.0F );
1041  const complex<float> beta ( 1.0F, 0.0F );
1042 
1043  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1044  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1045  }
1047 #endif
1048  //**********************************************************************************************
1049 
1050  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1051 #if BLAZE_BLAS_MODE
1052 
1065  template< typename VT1 // Type of the left-hand side target vector
1066  , typename MT1 // Type of the left-hand side matrix operand
1067  , typename VT2 > // Type of the right-hand side vector operand
1068  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1069  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1070  {
1071  using boost::numeric_cast;
1072 
1076  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1077  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1078  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1079 
1080  const int M ( numeric_cast<int>( A.rows() ) );
1081  const int N ( numeric_cast<int>( A.columns() ) );
1082  const int lda( numeric_cast<int>( A.spacing() ) );
1083  const complex<double> alpha( 1.0, 0.0 );
1084  const complex<double> beta ( 1.0, 0.0 );
1085 
1086  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1087  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1088  }
1090 #endif
1091  //**********************************************************************************************
1092 
1093  //**Addition assignment to sparse vectors*******************************************************
1094  // No special implementation for the addition assignment to sparse vectors.
1095  //**********************************************************************************************
1096 
1097  //**Subtraction assignment to dense vectors*****************************************************
1110  template< typename VT1 > // Type of the target dense vector
1111  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1112  {
1114 
1115  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1116 
1117  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1118  return;
1119  }
1120 
1121  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1122  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1123 
1124  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1125  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1126  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1127  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1128 
1129  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1130  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1131  DMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1132  else
1133  DMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1134  }
1136  //**********************************************************************************************
1137 
1138  //**Default subtraction assignment to dense vectors*********************************************
1152  template< typename VT1 // Type of the left-hand side target vector
1153  , typename MT1 // Type of the left-hand side matrix operand
1154  , typename VT2 > // Type of the right-hand side vector operand
1155  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1156  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1157  {
1158  y.subAssign( A * x );
1159  }
1161  //**********************************************************************************************
1162 
1163  //**Vectorized default subtraction assignment to dense vectors**********************************
1177  template< typename VT1 // Type of the left-hand side target vector
1178  , typename MT1 // Type of the left-hand side matrix operand
1179  , typename VT2 > // Type of the right-hand side vector operand
1180  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1181  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1182  {
1183  typedef IntrinsicTrait<ElementType> IT;
1184 
1185  const size_t M( A.rows() );
1186  const size_t N( A.columns() );
1187 
1188  size_t i( 0UL );
1189 
1190  for( ; (i+8UL) <= M; i+=8UL ) {
1191  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1192  for( size_t j=0UL; j<N; j+=IT::size ) {
1193  const IntrinsicType x1( x.load(j) );
1194  xmm1 = xmm1 + A.load(i ,j) * x1;
1195  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1196  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1197  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1198  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1199  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1200  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1201  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1202  }
1203  y[i ] -= sum( xmm1 );
1204  y[i+1UL] -= sum( xmm2 );
1205  y[i+2UL] -= sum( xmm3 );
1206  y[i+3UL] -= sum( xmm4 );
1207  y[i+4UL] -= sum( xmm5 );
1208  y[i+5UL] -= sum( xmm6 );
1209  y[i+6UL] -= sum( xmm7 );
1210  y[i+7UL] -= sum( xmm8 );
1211  }
1212  for( ; (i+4UL) <= M; i+=4UL ) {
1213  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1214  for( size_t j=0UL; j<N; j+=IT::size ) {
1215  const IntrinsicType x1( x.load(j) );
1216  xmm1 = xmm1 + A.load(i ,j) * x1;
1217  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1218  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1219  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1220  }
1221  y[i ] -= sum( xmm1 );
1222  y[i+1UL] -= sum( xmm2 );
1223  y[i+2UL] -= sum( xmm3 );
1224  y[i+3UL] -= sum( xmm4 );
1225  }
1226  for( ; (i+3UL) <= M; i+=3UL ) {
1227  IntrinsicType xmm1, xmm2, xmm3;
1228  for( size_t j=0UL; j<N; j+=IT::size ) {
1229  const IntrinsicType x1( x.load(j) );
1230  xmm1 = xmm1 + A.load(i ,j) * x1;
1231  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1232  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1233  }
1234  y[i ] -= sum( xmm1 );
1235  y[i+1UL] -= sum( xmm2 );
1236  y[i+2UL] -= sum( xmm3 );
1237  }
1238  for( ; (i+2UL) <= M; i+=2UL ) {
1239  IntrinsicType xmm1, xmm2;
1240  for( size_t j=0UL; j<N; j+=IT::size ) {
1241  const IntrinsicType x1( x.load(j) );
1242  xmm1 = xmm1 + A.load(i ,j) * x1;
1243  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1244  }
1245  y[i ] -= sum( xmm1 );
1246  y[i+1UL] -= sum( xmm2 );
1247  }
1248  if( i < M ) {
1249  IntrinsicType xmm1;
1250  for( size_t j=0UL; j<N; j+=IT::size ) {
1251  xmm1 = xmm1 + A.load(i,j) * x.load(j);
1252  }
1253  y[i] -= sum( xmm1 );
1254  }
1255  }
1257  //**********************************************************************************************
1258 
1259  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1273  template< typename VT1 // Type of the left-hand side target vector
1274  , typename MT1 // Type of the left-hand side matrix operand
1275  , typename VT2 > // Type of the right-hand side vector operand
1276  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1277  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1278  {
1279  selectDefaultSubAssignKernel( y, A, x );
1280  }
1282  //**********************************************************************************************
1283 
1284  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1285 #if BLAZE_BLAS_MODE
1286 
1299  template< typename VT1 // Type of the left-hand side target vector
1300  , typename MT1 // Type of the left-hand side matrix operand
1301  , typename VT2 > // Type of the right-hand side vector operand
1302  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1303  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1304  {
1305  using boost::numeric_cast;
1306 
1310 
1311  const int M ( numeric_cast<int>( A.rows() ) );
1312  const int N ( numeric_cast<int>( A.columns() ) );
1313  const int lda( numeric_cast<int>( A.spacing() ) );
1314 
1315  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1316  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1317  }
1319 #endif
1320  //**********************************************************************************************
1321 
1322  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1323 #if BLAZE_BLAS_MODE
1324 
1337  template< typename VT1 // Type of the left-hand side target vector
1338  , typename MT1 // Type of the left-hand side matrix operand
1339  , typename VT2 > // Type of the right-hand side vector operand
1340  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1341  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1342  {
1343  using boost::numeric_cast;
1344 
1348 
1349  const int M ( numeric_cast<int>( A.rows() ) );
1350  const int N ( numeric_cast<int>( A.columns() ) );
1351  const int lda( numeric_cast<int>( A.spacing() ) );
1352 
1353  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1354  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1355  }
1357 #endif
1358  //**********************************************************************************************
1359 
1360  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1361 #if BLAZE_BLAS_MODE
1362 
1375  template< typename VT1 // Type of the left-hand side target vector
1376  , typename MT1 // Type of the left-hand side matrix operand
1377  , typename VT2 > // Type of the right-hand side vector operand
1378  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1379  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1380  {
1381  using boost::numeric_cast;
1382 
1386  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1387  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1388  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1389 
1390  const int M ( numeric_cast<int>( A.rows() ) );
1391  const int N ( numeric_cast<int>( A.columns() ) );
1392  const int lda( numeric_cast<int>( A.spacing() ) );
1393  const complex<float> alpha( -1.0F, 0.0F );
1394  const complex<float> beta ( 1.0F, 0.0F );
1395 
1396  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1397  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1398  }
1400 #endif
1401  //**********************************************************************************************
1402 
1403  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1404 #if BLAZE_BLAS_MODE
1405 
1418  template< typename VT1 // Type of the left-hand side target vector
1419  , typename MT1 // Type of the left-hand side matrix operand
1420  , typename VT2 > // Type of the right-hand side vector operand
1421  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1422  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1423  {
1424  using boost::numeric_cast;
1425 
1429  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1430  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1431  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1432 
1433  const int M ( numeric_cast<int>( A.rows() ) );
1434  const int N ( numeric_cast<int>( A.columns() ) );
1435  const int lda( numeric_cast<int>( A.spacing() ) );
1436  const complex<double> alpha( -1.0, 0.0 );
1437  const complex<double> beta ( 1.0, 0.0 );
1438 
1439  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1440  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1441  }
1443 #endif
1444  //**********************************************************************************************
1445 
1446  //**Subtraction assignment to sparse vectors****************************************************
1447  // No special implementation for the subtraction assignment to sparse vectors.
1448  //**********************************************************************************************
1449 
1450  //**Multiplication assignment to dense vectors**************************************************
1463  template< typename VT1 > // Type of the target dense vector
1464  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1465  {
1467 
1471 
1472  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1473 
1474  const ResultType tmp( rhs );
1475  multAssign( ~lhs, tmp );
1476  }
1478  //**********************************************************************************************
1479 
1480  //**Multiplication assignment to sparse vectors*************************************************
1481  // No special implementation for the multiplication assignment to sparse vectors.
1482  //**********************************************************************************************
1483 
1484  //**Compile time checks*************************************************************************
1491  //**********************************************************************************************
1492 };
1493 //*************************************************************************************************
1494 
1495 
1496 
1497 
1498 //=================================================================================================
1499 //
1500 // DVECSCALARMULTEXPR SPECIALIZATION
1501 //
1502 //=================================================================================================
1503 
1504 //*************************************************************************************************
1512 template< typename MT // Type of the left-hand side dense matrix
1513  , typename VT // Type of the right-hand side dense vector
1514  , typename ST > // Type of the scalar value
1515 class DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >
1516  : public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1517  , private VecScalarMultExpr
1518  , private Computation
1519 {
1520  private:
1521  //**Type definitions****************************************************************************
1522  typedef DMatDVecMultExpr<MT,VT> MVM;
1523  typedef typename MVM::ResultType RES;
1524  typedef typename MT::ResultType MRT;
1525  typedef typename VT::ResultType VRT;
1526  typedef typename MRT::ElementType MET;
1527  typedef typename VRT::ElementType VET;
1528  typedef typename MT::CompositeType MCT;
1529  typedef typename VT::CompositeType VCT;
1530  //**********************************************************************************************
1531 
1532  //**********************************************************************************************
1534  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
1535  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1536  //**********************************************************************************************
1537 
1538  //**********************************************************************************************
1540  enum { evaluateVector = IsComputation<VT>::value };
1541  //**********************************************************************************************
1542 
1543  //**********************************************************************************************
1545 
1548  template< typename T1, typename T2, typename T3, typename T4 >
1549  struct UseSinglePrecisionKernel {
1550  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1551  IsFloat<typename T1::ElementType>::value &&
1552  IsFloat<typename T2::ElementType>::value &&
1553  IsFloat<typename T3::ElementType>::value &&
1554  !IsComplex<T4>::value };
1555  };
1556  //**********************************************************************************************
1557 
1558  //**********************************************************************************************
1560 
1563  template< typename T1, typename T2, typename T3, typename T4 >
1564  struct UseDoublePrecisionKernel {
1565  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1566  IsDouble<typename T1::ElementType>::value &&
1567  IsDouble<typename T2::ElementType>::value &&
1568  IsDouble<typename T3::ElementType>::value &&
1569  !IsComplex<T4>::value };
1570  };
1571  //**********************************************************************************************
1572 
1573  //**********************************************************************************************
1575 
1578  template< typename T1, typename T2, typename T3 >
1579  struct UseSinglePrecisionComplexKernel {
1580  typedef complex<float> Type;
1581  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1582  IsSame<typename T1::ElementType,Type>::value &&
1583  IsSame<typename T2::ElementType,Type>::value &&
1584  IsSame<typename T3::ElementType,Type>::value };
1585  };
1586  //**********************************************************************************************
1587 
1588  //**********************************************************************************************
1590 
1593  template< typename T1, typename T2, typename T3 >
1594  struct UseDoublePrecisionComplexKernel {
1595  typedef complex<double> Type;
1596  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1597  IsSame<typename T1::ElementType,Type>::value &&
1598  IsSame<typename T2::ElementType,Type>::value &&
1599  IsSame<typename T3::ElementType,Type>::value };
1600  };
1601  //**********************************************************************************************
1602 
1603  //**********************************************************************************************
1605 
1607  template< typename T1, typename T2, typename T3, typename T4 >
1608  struct UseDefaultKernel {
1609  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1610  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1611  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1612  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1613  };
1614  //**********************************************************************************************
1615 
1616  //**********************************************************************************************
1618 
1621  template< typename T1, typename T2, typename T3, typename T4 >
1622  struct UseVectorizedDefaultKernel {
1623  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1624  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1625  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1626  IsSame<typename T1::ElementType,T4>::value &&
1627  IntrinsicTrait<typename T1::ElementType>::addition &&
1628  IntrinsicTrait<typename T1::ElementType>::multiplication };
1629  };
1630  //**********************************************************************************************
1631 
1632  public:
1633  //**Type definitions****************************************************************************
1634  typedef DVecScalarMultExpr<MVM,ST,false> This;
1635  typedef typename MultTrait<RES,ST>::Type ResultType;
1636  typedef typename ResultType::TransposeType TransposeType;
1637  typedef typename ResultType::ElementType ElementType;
1638  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1639  typedef const ElementType ReturnType;
1640  typedef const ResultType CompositeType;
1641 
1643  typedef const DMatDVecMultExpr<MT,VT> LeftOperand;
1644 
1646  typedef ST RightOperand;
1647 
1649  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
1650 
1652  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
1653  //**********************************************************************************************
1654 
1655  //**Compilation flags***************************************************************************
1657  enum { vectorizable = 0 };
1658 
1660  enum { smpAssignable = 0 };
1661  //**********************************************************************************************
1662 
1663  //**Constructor*********************************************************************************
1669  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1670  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1671  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1672  {}
1673  //**********************************************************************************************
1674 
1675  //**Subscript operator**************************************************************************
1681  inline ReturnType operator[]( size_t index ) const {
1682  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1683  return vector_[index] * scalar_;
1684  }
1685  //**********************************************************************************************
1686 
1687  //**Size function*******************************************************************************
1692  inline size_t size() const {
1693  return vector_.size();
1694  }
1695  //**********************************************************************************************
1696 
1697  //**Left operand access*************************************************************************
1702  inline LeftOperand leftOperand() const {
1703  return vector_;
1704  }
1705  //**********************************************************************************************
1706 
1707  //**Right operand access************************************************************************
1712  inline RightOperand rightOperand() const {
1713  return scalar_;
1714  }
1715  //**********************************************************************************************
1716 
1717  //**********************************************************************************************
1723  template< typename T >
1724  inline bool canAlias( const T* alias ) const {
1725  return vector_.canAlias( alias );
1726  }
1727  //**********************************************************************************************
1728 
1729  //**********************************************************************************************
1735  template< typename T >
1736  inline bool isAliased( const T* alias ) const {
1737  return vector_.isAliased( alias );
1738  }
1739  //**********************************************************************************************
1740 
1741  private:
1742  //**Member variables****************************************************************************
1743  LeftOperand vector_;
1744  RightOperand scalar_;
1745  //**********************************************************************************************
1746 
1747  //**Assignment to dense vectors*****************************************************************
1759  template< typename VT1 > // Type of the target dense vector
1760  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1761  {
1763 
1764  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1765 
1766  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1767  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1768 
1769  if( left.rows() == 0UL ) {
1770  return;
1771  }
1772  else if( left.columns() == 0UL ) {
1773  reset( ~lhs );
1774  return;
1775  }
1776 
1777  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1778  RT x( right ); // Evaluation of the right-hand side dense vector operand
1779 
1780  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1781  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1782  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1783  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1784 
1785  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1786  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1787  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1788  else
1789  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1790  }
1791  //**********************************************************************************************
1792 
1793  //**Default assignment to dense vectors*********************************************************
1807  template< typename VT1 // Type of the left-hand side target vector
1808  , typename MT1 // Type of the left-hand side matrix operand
1809  , typename VT2 // Type of the right-hand side vector operand
1810  , typename ST2 > // Type of the scalar value
1811  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1812  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1813  {
1814  y.assign( A * x * scalar );
1815  }
1816  //**********************************************************************************************
1817 
1818  //**Vectorized default assignment to dense vectors**********************************************
1832  template< typename VT1 // Type of the left-hand side target vector
1833  , typename MT1 // Type of the left-hand side matrix operand
1834  , typename VT2 // Type of the right-hand side vector operand
1835  , typename ST2 > // Type of the scalar value
1836  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1837  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1838  {
1839  typedef IntrinsicTrait<ElementType> IT;
1840 
1841  const size_t M( A.rows() );
1842  const size_t N( A.columns() );
1843 
1844  size_t i( 0UL );
1845 
1846  for( ; (i+8UL) <= M; i+=8UL ) {
1847  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1848  for( size_t j=0UL; j<N; j+=IT::size ) {
1849  const IntrinsicType x1( x.load(j) );
1850  xmm1 = xmm1 + A.load(i ,j) * x1;
1851  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1852  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1853  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1854  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1855  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1856  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1857  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1858  }
1859  y[i ] = sum( xmm1 ) * scalar;
1860  y[i+1UL] = sum( xmm2 ) * scalar;
1861  y[i+2UL] = sum( xmm3 ) * scalar;
1862  y[i+3UL] = sum( xmm4 ) * scalar;
1863  y[i+4UL] = sum( xmm5 ) * scalar;
1864  y[i+5UL] = sum( xmm6 ) * scalar;
1865  y[i+6UL] = sum( xmm7 ) * scalar;
1866  y[i+7UL] = sum( xmm8 ) * scalar;
1867  }
1868  for( ; (i+4UL) <= M; i+=4UL ) {
1869  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1870  for( size_t j=0UL; j<N; j+=IT::size ) {
1871  const IntrinsicType x1( x.load(j) );
1872  xmm1 = xmm1 + A.load(i ,j) * x1;
1873  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1874  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1875  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1876  }
1877  y[i ] = sum( xmm1 ) * scalar;
1878  y[i+1UL] = sum( xmm2 ) * scalar;
1879  y[i+2UL] = sum( xmm3 ) * scalar;
1880  y[i+3UL] = sum( xmm4 ) * scalar;
1881  }
1882  for( ; (i+3UL) <= M; i+=3UL ) {
1883  IntrinsicType xmm1, xmm2, xmm3;
1884  for( size_t j=0UL; j<N; j+=IT::size ) {
1885  const IntrinsicType x1( x.load(j) );
1886  xmm1 = xmm1 + A.load(i ,j) * x1;
1887  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1888  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1889  }
1890  y[i ] = sum( xmm1 ) * scalar;
1891  y[i+1UL] = sum( xmm2 ) * scalar;
1892  y[i+2UL] = sum( xmm3 ) * scalar;
1893  }
1894  for( ; (i+2UL) <= M; i+=2UL ) {
1895  IntrinsicType xmm1, xmm2;
1896  for( size_t j=0UL; j<N; j+=IT::size ) {
1897  const IntrinsicType x1( x.load(j) );
1898  xmm1 = xmm1 + A.load(i ,j) * x1;
1899  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1900  }
1901  y[i ] = sum( xmm1 ) * scalar;
1902  y[i+1UL] = sum( xmm2 ) * scalar;
1903  }
1904  if( i < M ) {
1905  IntrinsicType xmm1;
1906  for( size_t j=0UL; j<N; j+=IT::size ) {
1907  xmm1 = xmm1 + A.load(i,j) * x.load(j);
1908  }
1909  y[i] = sum( xmm1 ) * scalar;
1910  }
1911  }
1912  //**********************************************************************************************
1913 
1914  //**BLAS-based assignment to dense vectors (default)********************************************
1928  template< typename VT1 // Type of the left-hand side target vector
1929  , typename MT1 // Type of the left-hand side matrix operand
1930  , typename VT2 // Type of the right-hand side vector operand
1931  , typename ST2 > // Type of the scalar value
1932  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1933  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1934  {
1935  selectDefaultAssignKernel( y, A, x, scalar );
1936  }
1937  //**********************************************************************************************
1938 
1939  //**BLAS-based assignment to dense vectors (single precision)***********************************
1940 #if BLAZE_BLAS_MODE
1941 
1954  template< typename VT1 // Type of the left-hand side target vector
1955  , typename MT1 // Type of the left-hand side matrix operand
1956  , typename VT2 // Type of the right-hand side vector operand
1957  , typename ST2 > // Type of the scalar value
1958  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1959  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1960  {
1961  using boost::numeric_cast;
1962 
1966 
1967  const int M ( numeric_cast<int>( A.rows() ) );
1968  const int N ( numeric_cast<int>( A.columns() ) );
1969  const int lda( numeric_cast<int>( A.spacing() ) );
1970 
1971  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1972  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1973  }
1974 #endif
1975  //**********************************************************************************************
1976 
1977  //**BLAS-based assignment to dense vectors (double precision)***********************************
1978 #if BLAZE_BLAS_MODE
1979 
1992  template< typename VT1 // Type of the left-hand side target vector
1993  , typename MT1 // Type of the left-hand side matrix operand
1994  , typename VT2 // Type of the right-hand side vector operand
1995  , typename ST2 > // Type of the scalar value
1996  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1997  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
1998  {
1999  using boost::numeric_cast;
2000 
2004 
2005  const int M ( numeric_cast<int>( A.rows() ) );
2006  const int N ( numeric_cast<int>( A.columns() ) );
2007  const int lda( numeric_cast<int>( A.spacing() ) );
2008 
2009  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2010  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2011  }
2012 #endif
2013  //**********************************************************************************************
2014 
2015  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2016 #if BLAZE_BLAS_MODE
2017 
2030  template< typename VT1 // Type of the left-hand side target vector
2031  , typename MT1 // Type of the left-hand side matrix operand
2032  , typename VT2 // Type of the right-hand side vector operand
2033  , typename ST2 > // Type of the scalar value
2034  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2035  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2036  {
2037  using boost::numeric_cast;
2038 
2042  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2043  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2044  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2045 
2046  const int M ( numeric_cast<int>( A.rows() ) );
2047  const int N ( numeric_cast<int>( A.columns() ) );
2048  const int lda( numeric_cast<int>( A.spacing() ) );
2049  const complex<float> alpha( scalar );
2050  const complex<float> beta ( 0.0F, 0.0F );
2051 
2052  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2053  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2054  }
2055 #endif
2056  //**********************************************************************************************
2057 
2058  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2059 #if BLAZE_BLAS_MODE
2060 
2073  template< typename VT1 // Type of the left-hand side target vector
2074  , typename MT1 // Type of the left-hand side matrix operand
2075  , typename VT2 // Type of the right-hand side vector operand
2076  , typename ST2 > // Type of the scalar value
2077  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2078  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2079  {
2080  using boost::numeric_cast;
2081 
2085  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2086  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2087  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2088 
2089  const int M ( numeric_cast<int>( A.rows() ) );
2090  const int N ( numeric_cast<int>( A.columns() ) );
2091  const int lda( numeric_cast<int>( A.spacing() ) );
2092  const complex<double> alpha( scalar );
2093  const complex<double> beta ( 0.0, 0.0 );
2094 
2095  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2096  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2097  }
2098 #endif
2099  //**********************************************************************************************
2100 
2101  //**Assignment to sparse vectors****************************************************************
2113  template< typename VT1 > // Type of the target sparse vector
2114  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2115  {
2117 
2121 
2122  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2123 
2124  const ResultType tmp( rhs );
2125  assign( ~lhs, tmp );
2126  }
2127  //**********************************************************************************************
2128 
2129  //**Addition assignment to dense vectors********************************************************
2141  template< typename VT1 > // Type of the target dense vector
2142  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2143  {
2145 
2146  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2147 
2148  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2149  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2150 
2151  if( left.rows() == 0UL || left.columns() == 0UL ) {
2152  return;
2153  }
2154 
2155  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2156  RT x( right ); // Evaluation of the right-hand side dense vector operand
2157 
2158  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2159  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2160  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2161  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2162 
2163  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2164  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2165  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2166  else
2167  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2168  }
2169  //**********************************************************************************************
2170 
2171  //**Default addition assignment to dense vectors************************************************
2185  template< typename VT1 // Type of the left-hand side target vector
2186  , typename MT1 // Type of the left-hand side matrix operand
2187  , typename VT2 // Type of the right-hand side vector operand
2188  , typename ST2 > // Type of the scalar value
2189  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2190  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2191  {
2192  y.addAssign( A * x * scalar );
2193  }
2194  //**********************************************************************************************
2195 
2196  //**Vectorized default addition assignment to dense vectors*************************************
2210  template< typename VT1 // Type of the left-hand side target vector
2211  , typename MT1 // Type of the left-hand side matrix operand
2212  , typename VT2 // Type of the right-hand side vector operand
2213  , typename ST2 > // Type of the scalar value
2214  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2215  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2216  {
2217  typedef IntrinsicTrait<ElementType> IT;
2218 
2219  const size_t M( A.rows() );
2220  const size_t N( A.columns() );
2221 
2222  size_t i( 0UL );
2223 
2224  for( ; (i+8UL) <= M; i+=8UL ) {
2225  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2226  for( size_t j=0UL; j<N; j+=IT::size ) {
2227  const IntrinsicType x1( x.load(j) );
2228  xmm1 = xmm1 + A.load(i ,j) * x1;
2229  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2230  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2231  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2232  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2233  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2234  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2235  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2236  }
2237  y[i ] += sum( xmm1 ) * scalar;
2238  y[i+1UL] += sum( xmm2 ) * scalar;
2239  y[i+2UL] += sum( xmm3 ) * scalar;
2240  y[i+3UL] += sum( xmm4 ) * scalar;
2241  y[i+4UL] += sum( xmm5 ) * scalar;
2242  y[i+5UL] += sum( xmm6 ) * scalar;
2243  y[i+6UL] += sum( xmm7 ) * scalar;
2244  y[i+7UL] += sum( xmm8 ) * scalar;
2245  }
2246  for( ; (i+4UL) <= M; i+=4UL ) {
2247  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2248  for( size_t j=0UL; j<N; j+=IT::size ) {
2249  const IntrinsicType x1( x.load(j) );
2250  xmm1 = xmm1 + A.load(i ,j) * x1;
2251  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2252  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2253  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2254  }
2255  y[i ] += sum( xmm1 ) * scalar;
2256  y[i+1UL] += sum( xmm2 ) * scalar;
2257  y[i+2UL] += sum( xmm3 ) * scalar;
2258  y[i+3UL] += sum( xmm4 ) * scalar;
2259  }
2260  for( ; (i+3UL) <= M; i+=3UL ) {
2261  IntrinsicType xmm1, xmm2, xmm3;
2262  for( size_t j=0UL; j<N; j+=IT::size ) {
2263  const IntrinsicType x1( x.load(j) );
2264  xmm1 = xmm1 + A.load(i ,j) * x1;
2265  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2266  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2267  }
2268  y[i ] += sum( xmm1 ) * scalar;
2269  y[i+1UL] += sum( xmm2 ) * scalar;
2270  y[i+2UL] += sum( xmm3 ) * scalar;
2271  }
2272  for( ; (i+2UL) <= M; i+=2UL ) {
2273  IntrinsicType xmm1, xmm2;
2274  for( size_t j=0UL; j<N; j+=IT::size ) {
2275  const IntrinsicType x1( x.load(j) );
2276  xmm1 = xmm1 + A.load(i ,j) * x1;
2277  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2278  }
2279  y[i ] += sum( xmm1 ) * scalar;
2280  y[i+1UL] += sum( xmm2 ) * scalar;
2281  }
2282  if( i < M ) {
2283  IntrinsicType xmm1;
2284  for( size_t j=0UL; j<N; j+=IT::size ) {
2285  xmm1 = xmm1 + A.load(i,j) * x.load(j);
2286  }
2287  y[i] += sum( xmm1 ) * scalar;
2288  }
2289  }
2290  //**********************************************************************************************
2291 
2292  //**BLAS-based addition assignment to dense vectors (default)***********************************
2306  template< typename VT1 // Type of the left-hand side target vector
2307  , typename MT1 // Type of the left-hand side matrix operand
2308  , typename VT2 // Type of the right-hand side vector operand
2309  , typename ST2 > // Type of the scalar value
2310  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2311  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2312  {
2313  selectDefaultAddAssignKernel( y, A, x, scalar );
2314  }
2315  //**********************************************************************************************
2316 
2317  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2318 #if BLAZE_BLAS_MODE
2319 
2332  template< typename VT1 // Type of the left-hand side target vector
2333  , typename MT1 // Type of the left-hand side matrix operand
2334  , typename VT2 // Type of the right-hand side vector operand
2335  , typename ST2 > // Type of the scalar value
2336  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2337  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2338  {
2339  using boost::numeric_cast;
2340 
2344 
2345  const int M ( numeric_cast<int>( A.rows() ) );
2346  const int N ( numeric_cast<int>( A.columns() ) );
2347  const int lda( numeric_cast<int>( A.spacing() ) );
2348 
2349  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2350  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2351  }
2352 #endif
2353  //**********************************************************************************************
2354 
2355  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2356 #if BLAZE_BLAS_MODE
2357 
2370  template< typename VT1 // Type of the left-hand side target vector
2371  , typename MT1 // Type of the left-hand side matrix operand
2372  , typename VT2 // Type of the right-hand side vector operand
2373  , typename ST2 > // Type of the scalar value
2374  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2375  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2376  {
2377  using boost::numeric_cast;
2378 
2382 
2383  const int M ( numeric_cast<int>( A.rows() ) );
2384  const int N ( numeric_cast<int>( A.columns() ) );
2385  const int lda( numeric_cast<int>( A.spacing() ) );
2386 
2387  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2388  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2389  }
2390 #endif
2391  //**********************************************************************************************
2392 
2393  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2394 #if BLAZE_BLAS_MODE
2395 
2408  template< typename VT1 // Type of the left-hand side target vector
2409  , typename MT1 // Type of the left-hand side matrix operand
2410  , typename VT2 // Type of the right-hand side vector operand
2411  , typename ST2 > // Type of the scalar value
2412  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2413  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2414  {
2415  using boost::numeric_cast;
2416 
2420  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2421  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2422  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2423 
2424  const int M ( numeric_cast<int>( A.rows() ) );
2425  const int N ( numeric_cast<int>( A.columns() ) );
2426  const int lda( numeric_cast<int>( A.spacing() ) );
2427  const complex<float> alpha( scalar );
2428  const complex<float> beta ( 1.0F, 0.0F );
2429 
2430  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2431  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2432  }
2433 #endif
2434  //**********************************************************************************************
2435 
2436  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2437 #if BLAZE_BLAS_MODE
2438 
2451  template< typename VT1 // Type of the left-hand side target vector
2452  , typename MT1 // Type of the left-hand side matrix operand
2453  , typename VT2 // Type of the right-hand side vector operand
2454  , typename ST2 > // Type of the scalar value
2455  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2456  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2457  {
2458  using boost::numeric_cast;
2459 
2463  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2464  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2465  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2466 
2467  const int M ( numeric_cast<int>( A.rows() ) );
2468  const int N ( numeric_cast<int>( A.columns() ) );
2469  const int lda( numeric_cast<int>( A.spacing() ) );
2470  const complex<double> alpha( scalar );
2471  const complex<double> beta ( 1.0, 0.0 );
2472 
2473  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2474  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2475  }
2476 #endif
2477  //**********************************************************************************************
2478 
2479  //**Addition assignment to sparse vectors*******************************************************
2480  // No special implementation for the addition assignment to sparse vectors.
2481  //**********************************************************************************************
2482 
2483  //**Subtraction assignment to dense vectors*****************************************************
2495  template< typename VT1 > // Type of the target dense vector
2496  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2497  {
2499 
2500  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2501 
2502  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2503  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2504 
2505  if( left.rows() == 0UL || left.columns() == 0UL ) {
2506  return;
2507  }
2508 
2509  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2510  RT x( right ); // Evaluation of the right-hand side dense vector operand
2511 
2512  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2513  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2514  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2515  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2516 
2517  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2518  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2519  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2520  else
2521  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2522  }
2523  //**********************************************************************************************
2524 
2525  //**Default subtraction assignment to dense vectors*********************************************
2539  template< typename VT1 // Type of the left-hand side target vector
2540  , typename MT1 // Type of the left-hand side matrix operand
2541  , typename VT2 // Type of the right-hand side vector operand
2542  , typename ST2 > // Type of the scalar value
2543  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2544  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2545  {
2546  y.subAssign( A * x * scalar );
2547  }
2548  //**********************************************************************************************
2549 
2550  //**Vectorized default subtraction assignment to dense vectors**********************************
2564  template< typename VT1 // Type of the left-hand side target vector
2565  , typename MT1 // Type of the left-hand side matrix operand
2566  , typename VT2 // Type of the right-hand side vector operand
2567  , typename ST2 > // Type of the scalar value
2568  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2569  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2570  {
2571  typedef IntrinsicTrait<ElementType> IT;
2572 
2573  const size_t M( A.rows() );
2574  const size_t N( A.columns() );
2575 
2576  size_t i( 0UL );
2577 
2578  for( ; (i+8UL) <= M; i+=8UL ) {
2579  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2580  for( size_t j=0UL; j<N; j+=IT::size ) {
2581  const IntrinsicType x1( x.load(j) );
2582  xmm1 = xmm1 + A.load(i ,j) * x1;
2583  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2584  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2585  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2586  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2587  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2588  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2589  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2590  }
2591  y[i ] -= sum( xmm1 ) * scalar;
2592  y[i+1UL] -= sum( xmm2 ) * scalar;
2593  y[i+2UL] -= sum( xmm3 ) * scalar;
2594  y[i+3UL] -= sum( xmm4 ) * scalar;
2595  y[i+4UL] -= sum( xmm5 ) * scalar;
2596  y[i+5UL] -= sum( xmm6 ) * scalar;
2597  y[i+6UL] -= sum( xmm7 ) * scalar;
2598  y[i+7UL] -= sum( xmm8 ) * scalar;
2599  }
2600  for( ; (i+4UL) <= M; i+=4UL ) {
2601  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2602  for( size_t j=0UL; j<N; j+=IT::size ) {
2603  const IntrinsicType x1( x.load(j) );
2604  xmm1 = xmm1 + A.load(i ,j) * x1;
2605  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2606  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2607  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2608  }
2609  y[i ] -= sum( xmm1 ) * scalar;
2610  y[i+1UL] -= sum( xmm2 ) * scalar;
2611  y[i+2UL] -= sum( xmm3 ) * scalar;
2612  y[i+3UL] -= sum( xmm4 ) * scalar;
2613  }
2614  for( ; (i+3UL) <= M; i+=3UL ) {
2615  IntrinsicType xmm1, xmm2, xmm3;
2616  for( size_t j=0UL; j<N; j+=IT::size ) {
2617  const IntrinsicType x1( x.load(j) );
2618  xmm1 = xmm1 + A.load(i ,j) * x1;
2619  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2620  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2621  }
2622  y[i ] -= sum( xmm1 ) * scalar;
2623  y[i+1UL] -= sum( xmm2 ) * scalar;
2624  y[i+2UL] -= sum( xmm3 ) * scalar;
2625  }
2626  for( ; (i+2UL) <= M; i+=2UL ) {
2627  IntrinsicType xmm1, xmm2;
2628  for( size_t j=0UL; j<N; j+=IT::size ) {
2629  const IntrinsicType x1( x.load(j) );
2630  xmm1 = xmm1 + A.load(i ,j) * x1;
2631  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2632  }
2633  y[i ] -= sum( xmm1 ) * scalar;
2634  y[i+1UL] -= sum( xmm2 ) * scalar;
2635  }
2636  if( i < M ) {
2637  IntrinsicType xmm1;
2638  for( size_t j=0UL; j<N; j+=IT::size ) {
2639  xmm1 = xmm1 + A.load(i,j) * x.load(j);
2640  }
2641  y[i] -= sum( xmm1 ) * scalar;
2642  }
2643  }
2644  //**********************************************************************************************
2645 
2646  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2660  template< typename VT1 // Type of the left-hand side target vector
2661  , typename MT1 // Type of the left-hand side matrix operand
2662  , typename VT2 // Type of the right-hand side vector operand
2663  , typename ST2 > // Type of the scalar value
2664  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2665  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2666  {
2667  selectDefaultSubAssignKernel( y, A, x, scalar );
2668  }
2669  //**********************************************************************************************
2670 
2671  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2672 #if BLAZE_BLAS_MODE
2673 
2686  template< typename VT1 // Type of the left-hand side target vector
2687  , typename MT1 // Type of the left-hand side matrix operand
2688  , typename VT2 // Type of the right-hand side vector operand
2689  , typename ST2 > // Type of the scalar value
2690  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2691  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2692  {
2693  using boost::numeric_cast;
2694 
2698 
2699  const int M ( numeric_cast<int>( A.rows() ) );
2700  const int N ( numeric_cast<int>( A.columns() ) );
2701  const int lda( numeric_cast<int>( A.spacing() ) );
2702 
2703  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2704  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2705  }
2706 #endif
2707  //**********************************************************************************************
2708 
2709  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2710 #if BLAZE_BLAS_MODE
2711 
2724  template< typename VT1 // Type of the left-hand side target vector
2725  , typename MT1 // Type of the left-hand side matrix operand
2726  , typename VT2 // Type of the right-hand side vector operand
2727  , typename ST2 > // Type of the scalar value
2728  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2729  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2730  {
2731  using boost::numeric_cast;
2732 
2736 
2737  const int M ( numeric_cast<int>( A.rows() ) );
2738  const int N ( numeric_cast<int>( A.columns() ) );
2739  const int lda( numeric_cast<int>( A.spacing() ) );
2740 
2741  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2742  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2743  }
2744 #endif
2745  //**********************************************************************************************
2746 
2747  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2748 #if BLAZE_BLAS_MODE
2749 
2762  template< typename VT1 // Type of the left-hand side target vector
2763  , typename MT1 // Type of the left-hand side matrix operand
2764  , typename VT2 // Type of the right-hand side vector operand
2765  , typename ST2 > // Type of the scalar value
2766  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2767  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2768  {
2769  using boost::numeric_cast;
2770 
2774  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2775  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2776  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2777 
2778  const int M ( numeric_cast<int>( A.rows() ) );
2779  const int N ( numeric_cast<int>( A.columns() ) );
2780  const int lda( numeric_cast<int>( A.spacing() ) );
2781  const complex<float> alpha( -scalar );
2782  const complex<float> beta ( 1.0F, 0.0F );
2783 
2784  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2785  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2786  }
2787 #endif
2788  //**********************************************************************************************
2789 
2790  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2791 #if BLAZE_BLAS_MODE
2792 
2805  template< typename VT1 // Type of the left-hand side target vector
2806  , typename MT1 // Type of the left-hand side matrix operand
2807  , typename VT2 // Type of the right-hand side vector operand
2808  , typename ST2 > // Type of the scalar value
2809  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2810  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2811  {
2812  using boost::numeric_cast;
2813 
2817  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2818  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2819  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2820 
2821  const int M ( numeric_cast<int>( A.rows() ) );
2822  const int N ( numeric_cast<int>( A.columns() ) );
2823  const int lda( numeric_cast<int>( A.spacing() ) );
2824  const complex<double> alpha( -scalar );
2825  const complex<double> beta ( 1.0, 0.0 );
2826 
2827  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2828  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2829  }
2830 #endif
2831  //**********************************************************************************************
2832 
2833  //**Subtraction assignment to sparse vectors****************************************************
2834  // No special implementation for the subtraction assignment to sparse vectors.
2835  //**********************************************************************************************
2836 
2837  //**Multiplication assignment to dense vectors**************************************************
2849  template< typename VT1 > // Type of the target dense vector
2850  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2851  {
2853 
2857 
2858  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2859 
2860  const ResultType tmp( rhs );
2861  multAssign( ~lhs, tmp );
2862  }
2863  //**********************************************************************************************
2864 
2865  //**Multiplication assignment to sparse vectors*************************************************
2866  // No special implementation for the multiplication assignment to sparse vectors.
2867  //**********************************************************************************************
2868 
2869  //**Compile time checks*************************************************************************
2878  //**********************************************************************************************
2879 };
2881 //*************************************************************************************************
2882 
2883 
2884 
2885 
2886 //=================================================================================================
2887 //
2888 // GLOBAL BINARY ARITHMETIC OPERATORS
2889 //
2890 //=================================================================================================
2891 
2892 //*************************************************************************************************
2922 template< typename T1 // Type of the left-hand side dense matrix
2923  , typename T2 > // Type of the right-hand side dense vector
2924 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
2926 {
2928 
2929  if( (~mat).columns() != (~vec).size() )
2930  throw std::invalid_argument( "Matrix and vector sizes do not match" );
2931 
2932  return DMatDVecMultExpr<T1,T2>( ~mat, ~vec );
2933 }
2934 //*************************************************************************************************
2935 
2936 
2937 
2938 
2939 //=================================================================================================
2940 //
2941 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
2942 //
2943 //=================================================================================================
2944 
2945 //*************************************************************************************************
2958 template< typename T1 // Type of the left-hand side dense matrix
2959  , bool SO // Storage order of the left-hand side dense matrix
2960  , typename T2 > // Type of the right-hand side dense vector
2961 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
2963 {
2965 
2966  return (~mat).leftOperand() * ( (~mat).rightOperand() * vec );
2967 }
2968 //*************************************************************************************************
2969 
2970 
2971 
2972 
2973 //=================================================================================================
2974 //
2975 // EXPRESSION TRAIT SPECIALIZATIONS
2976 //
2977 //=================================================================================================
2978 
2979 //*************************************************************************************************
2981 template< typename MT, typename VT >
2982 struct SubvectorExprTrait< DMatDVecMultExpr<MT,VT> >
2983 {
2984  public:
2985  //**********************************************************************************************
2986  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT>::Type, VT >::Type Type;
2987  //**********************************************************************************************
2988 };
2990 //*************************************************************************************************
2991 
2992 } // namespace blaze
2993 
2994 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:240
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:325
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
DMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the DMatDVecMultExpr class.
Definition: DMatDVecMultExpr.h:263
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
Expression object for dense matrix-dense vector multiplications.The DMatDVecMultExpr class represents...
Definition: DMatDVecMultExpr.h:103
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:109
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDVecMultExpr.h:229
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
Header file for the DenseVector base class.
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
DMatDVecMultExpr< MT, VT > This
Type of this DMatDVecMultExpr instance.
Definition: DMatDVecMultExpr.h:228
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
size_t size() const
Returns the current size/dimension of the vector.
Definition: DMatDVecMultExpr.h:305
const size_t end_
End of the unrolled calculation loop.
Definition: DMatDVecMultExpr.h:358
Constraint on the data type.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:114
Constraint on the data type.
Header file for the MultExprTrait class template.
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:111
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:250
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:246
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:113
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:315
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
Base class for all matrix/vector multiplication expression templates.The MatVecMultExpr class serves ...
Definition: MatVecMultExpr.h:66
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:243
Constraints on the storage order of matrix types.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDVecMultExpr.h:231
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDVecMultExpr.h:356
Header file for the EnableIf class template.
Header file for the IsNumeric type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: DMatDVecMultExpr.h:357
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDVecMultExpr.h:337
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:110
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDVecMultExpr.h:234
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:237
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDVecMultExpr.h:349
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDVecMultExpr.h:230
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDVecMultExpr.h:232
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDVecMultExpr.h:233
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:154
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: DMatDVecMultExpr.h:278
Header file for the MatVecMultExpr base class.
const size_t DMATDVECMULT_THRESHOLD
Row-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:51
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:112
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.