All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
67 #include <blaze/system/BLAS.h>
69 #include <blaze/util/Assert.h>
70 #include <blaze/util/Complex.h>
76 #include <blaze/util/DisableIf.h>
77 #include <blaze/util/EnableIf.h>
79 #include <blaze/util/SelectType.h>
80 #include <blaze/util/Types.h>
86 
87 
88 namespace blaze {
89 
90 //=================================================================================================
91 //
92 // CLASS DMATDVECMULTEXPR
93 //
94 //=================================================================================================
95 
96 //*************************************************************************************************
103 template< typename MT // Type of the left-hand side dense matrix
104  , typename VT > // Type of the right-hand side dense vector
105 class DMatDVecMultExpr : public DenseVector< DMatDVecMultExpr<MT,VT>, false >
106  , private MatVecMultExpr
107  , private Computation
108 {
109  private:
110  //**Type definitions****************************************************************************
111  typedef typename MT::ResultType MRT;
112  typedef typename VT::ResultType VRT;
113  typedef typename MRT::ElementType MET;
114  typedef typename VRT::ElementType VET;
115  typedef typename MT::CompositeType MCT;
116  typedef typename VT::CompositeType VCT;
117  //**********************************************************************************************
118 
119  //**********************************************************************************************
121  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
123  //**********************************************************************************************
124 
125  //**********************************************************************************************
127  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
128  //**********************************************************************************************
129 
130  //**********************************************************************************************
132 
135  template< typename T1, typename T2, typename T3 >
136  struct UseSMPAssignKernel {
137  enum { value = evaluateMatrix || evaluateVector };
138  };
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144 
148  template< typename T1, typename T2, typename T3 >
149  struct UseSinglePrecisionKernel {
150  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
154  };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
164  template< typename T1, typename T2, typename T3 >
165  struct UseDoublePrecisionKernel {
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
180  template< typename T1, typename T2, typename T3 >
181  struct UseSinglePrecisionComplexKernel {
182  typedef complex<float> Type;
183  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseDoublePrecisionComplexKernel {
199  typedef complex<double> Type;
200  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
213  template< typename T1, typename T2, typename T3 >
214  struct UseDefaultKernel {
215  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
216  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
217  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
218  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
219  };
221  //**********************************************************************************************
222 
223  //**********************************************************************************************
225 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
236  };
238  //**********************************************************************************************
239 
240  public:
241  //**Type definitions****************************************************************************
247  typedef const ElementType ReturnType;
248  typedef const ResultType CompositeType;
249 
251  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
252 
254  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
255 
258 
261  //**********************************************************************************************
262 
263  //**Compilation flags***************************************************************************
265  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
269 
271  enum { smpAssignable = !evaluateMatrix && !evaluateVector };
272  //**********************************************************************************************
273 
274  //**Constructor*********************************************************************************
280  explicit inline DMatDVecMultExpr( const MT& mat, const VT& vec )
281  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
282  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
283  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
284  {
285  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
286  }
287  //**********************************************************************************************
288 
289  //**Subscript operator**************************************************************************
295  inline ReturnType operator[]( size_t index ) const {
296  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
297 
298  ElementType res;
299 
300  if( mat_.columns() != 0UL ) {
301  res = mat_(index,0UL) * vec_[0UL];
302  for( size_t j=1UL; j<end_; j+=2UL ) {
303  res += mat_(index,j) * vec_[j] + mat_(index,j+1UL) * vec_[j+1UL];
304  }
305  if( end_ < mat_.columns() ) {
306  res += mat_(index,end_) * vec_[end_];
307  }
308  }
309  else {
310  reset( res );
311  }
312 
313  return res;
314  }
315  //**********************************************************************************************
316 
317  //**Size function*******************************************************************************
322  inline size_t size() const {
323  return mat_.rows();
324  }
325  //**********************************************************************************************
326 
327  //**Left operand access*************************************************************************
332  inline LeftOperand leftOperand() const {
333  return mat_;
334  }
335  //**********************************************************************************************
336 
337  //**Right operand access************************************************************************
342  inline RightOperand rightOperand() const {
343  return vec_;
344  }
345  //**********************************************************************************************
346 
347  //**********************************************************************************************
353  template< typename T >
354  inline bool canAlias( const T* alias ) const {
355  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
356  }
357  //**********************************************************************************************
358 
359  //**********************************************************************************************
365  template< typename T >
366  inline bool isAliased( const T* alias ) const {
367  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
368  }
369  //**********************************************************************************************
370 
371  //**********************************************************************************************
376  inline bool isAligned() const {
377  return mat_.isAligned() && vec_.isAligned();
378  }
379  //**********************************************************************************************
380 
381  //**********************************************************************************************
386  inline bool canSMPAssign() const {
387  return ( !BLAZE_BLAS_IS_PARALLEL ||
388  ( IsComputation<MT>::value && !evaluateMatrix ) ||
389  ( mat_.rows() * mat_.columns() < DMATDVECMULT_THRESHOLD ) ) &&
391  }
392  //**********************************************************************************************
393 
394  private:
395  //**Member variables****************************************************************************
398  const size_t end_;
399  //**********************************************************************************************
400 
401  //**Assignment to dense vectors*****************************************************************
414  template< typename VT1 > // Type of the target dense vector
415  friend inline void assign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
416  {
418 
419  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
420 
421  if( rhs.mat_.rows() == 0UL ) {
422  return;
423  }
424  else if( rhs.mat_.columns() == 0UL ) {
425  reset( ~lhs );
426  return;
427  }
428 
429  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
430  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
431 
432  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
433  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
434  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
435  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
436 
437  DMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
438  }
440  //**********************************************************************************************
441 
442  //**Assignment to dense vectors (kernel selection)**********************************************
453  template< typename VT1 // Type of the left-hand side target vector
454  , typename MT1 // Type of the left-hand side matrix operand
455  , typename VT2 > // Type of the right-hand side vector operand
456  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
457  selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
458  {
459  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
460  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
461  DMatDVecMultExpr::selectDefaultAssignKernel( y, A, x );
462  else
463  DMatDVecMultExpr::selectBlasAssignKernel( y, A, x );
464  }
466  //**********************************************************************************************
467 
468  //**Assignment to dense vectors (kernel selection)**********************************************
479  template< typename VT1 // Type of the left-hand side target vector
480  , typename MT1 // Type of the left-hand side matrix operand
481  , typename VT2 > // Type of the right-hand side vector operand
482  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
483  selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
484  {
485  smpAssign( y, A * x );
486  }
488  //**********************************************************************************************
489 
490  //**Default assignment to dense vectors*********************************************************
504  template< typename VT1 // Type of the left-hand side target vector
505  , typename MT1 // Type of the left-hand side matrix operand
506  , typename VT2 > // Type of the right-hand side vector operand
507  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
508  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
509  {
510  y.assign( A * x );
511  }
513  //**********************************************************************************************
514 
515  //**Vectorized default assignment to dense vectors**********************************************
529  template< typename VT1 // Type of the left-hand side target vector
530  , typename MT1 // Type of the left-hand side matrix operand
531  , typename VT2 > // Type of the right-hand side vector operand
532  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
533  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
534  {
535  typedef IntrinsicTrait<ElementType> IT;
536 
537  const size_t M( A.rows() );
538  const size_t N( A.columns() );
539 
540  size_t i( 0UL );
541 
542  for( ; (i+8UL) <= M; i+=8UL ) {
543  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
544  for( size_t j=0UL; j<N; j+=IT::size ) {
545  const IntrinsicType x1( x.load(j) );
546  xmm1 = xmm1 + A.load(i ,j) * x1;
547  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
548  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
549  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
550  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
551  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
552  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
553  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
554  }
555  y[i ] = sum( xmm1 );
556  y[i+1UL] = sum( xmm2 );
557  y[i+2UL] = sum( xmm3 );
558  y[i+3UL] = sum( xmm4 );
559  y[i+4UL] = sum( xmm5 );
560  y[i+5UL] = sum( xmm6 );
561  y[i+6UL] = sum( xmm7 );
562  y[i+7UL] = sum( xmm8 );
563  }
564  for( ; (i+4UL) <= M; i+=4UL ) {
565  IntrinsicType xmm1, xmm2, xmm3, xmm4;
566  for( size_t j=0UL; j<N; j+=IT::size ) {
567  const IntrinsicType x1( x.load(j) );
568  xmm1 = xmm1 + A.load(i ,j) * x1;
569  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
570  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
571  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
572  }
573  y[i ] = sum( xmm1 );
574  y[i+1UL] = sum( xmm2 );
575  y[i+2UL] = sum( xmm3 );
576  y[i+3UL] = sum( xmm4 );
577  }
578  for( ; (i+3UL) <= M; i+=3UL ) {
579  IntrinsicType xmm1, xmm2, xmm3;
580  for( size_t j=0UL; j<N; j+=IT::size ) {
581  const IntrinsicType x1( x.load(j) );
582  xmm1 = xmm1 + A.load(i ,j) * x1;
583  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
584  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
585  }
586  y[i ] = sum( xmm1 );
587  y[i+1UL] = sum( xmm2 );
588  y[i+2UL] = sum( xmm3 );
589  }
590  for( ; (i+2UL) <= M; i+=2UL ) {
591  IntrinsicType xmm1, xmm2;
592  for( size_t j=0UL; j<N; j+=IT::size ) {
593  const IntrinsicType x1( x.load(j) );
594  xmm1 = xmm1 + A.load(i ,j) * x1;
595  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
596  }
597  y[i ] = sum( xmm1 );
598  y[i+1UL] = sum( xmm2 );
599  }
600  if( i < M ) {
601  IntrinsicType xmm1;
602  for( size_t j=0UL; j<N; j+=IT::size ) {
603  xmm1 = xmm1 + A.load(i,j) * x.load(j);
604  }
605  y[i] = sum( xmm1 );
606  }
607  }
609  //**********************************************************************************************
610 
611  //**BLAS-based assignment to dense vectors (default)********************************************
625  template< typename VT1 // Type of the left-hand side target vector
626  , typename MT1 // Type of the left-hand side matrix operand
627  , typename VT2 > // Type of the right-hand side vector operand
628  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
629  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
630  {
631  selectDefaultAssignKernel( y, A, x );
632  }
634  //**********************************************************************************************
635 
636  //**BLAS-based assignment to dense vectors (single precision)***********************************
637 #if BLAZE_BLAS_MODE
638 
651  template< typename VT1 // Type of the left-hand side target vector
652  , typename MT1 // Type of the left-hand side matrix operand
653  , typename VT2 > // Type of the right-hand side vector operand
654  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
655  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
656  {
657  using boost::numeric_cast;
658 
662 
663  const int M ( numeric_cast<int>( A.rows() ) );
664  const int N ( numeric_cast<int>( A.columns() ) );
665  const int lda( numeric_cast<int>( A.spacing() ) );
666 
667  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
668  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
669  }
671 #endif
672  //**********************************************************************************************
673 
674  //**BLAS-based assignment to dense vectors (double precision)***********************************
675 #if BLAZE_BLAS_MODE
676 
689  template< typename VT1 // Type of the left-hand side target vector
690  , typename MT1 // Type of the left-hand side matrix operand
691  , typename VT2 > // Type of the right-hand side vector operand
692  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
693  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
694  {
695  using boost::numeric_cast;
696 
700 
701  const int M ( numeric_cast<int>( A.rows() ) );
702  const int N ( numeric_cast<int>( A.columns() ) );
703  const int lda( numeric_cast<int>( A.spacing() ) );
704 
705  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
706  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
707  }
709 #endif
710  //**********************************************************************************************
711 
712  //**BLAS-based assignment to dense vectors (single precision complex)***************************
713 #if BLAZE_BLAS_MODE
714 
727  template< typename VT1 // Type of the left-hand side target vector
728  , typename MT1 // Type of the left-hand side matrix operand
729  , typename VT2 > // Type of the right-hand side vector operand
730  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
731  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
732  {
733  using boost::numeric_cast;
734 
738  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
739  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
740  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
741 
742  const int M ( numeric_cast<int>( A.rows() ) );
743  const int N ( numeric_cast<int>( A.columns() ) );
744  const int lda( numeric_cast<int>( A.spacing() ) );
745  const complex<float> alpha( 1.0F, 0.0F );
746  const complex<float> beta ( 0.0F, 0.0F );
747 
748  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
749  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
750  }
752 #endif
753  //**********************************************************************************************
754 
755  //**BLAS-based assignment to dense vectors (double precision complex)***************************
756 #if BLAZE_BLAS_MODE
757 
770  template< typename VT1 // Type of the left-hand side target vector
771  , typename MT1 // Type of the left-hand side matrix operand
772  , typename VT2 > // Type of the right-hand side vector operand
773  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
774  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
775  {
776  using boost::numeric_cast;
777 
781  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
782  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
783  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
784 
785  const int M ( numeric_cast<int>( A.rows() ) );
786  const int N ( numeric_cast<int>( A.columns() ) );
787  const int lda( numeric_cast<int>( A.spacing() ) );
788  const complex<double> alpha( 1.0, 0.0 );
789  const complex<double> beta ( 0.0, 0.0 );
790 
791  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
792  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
793  }
795 #endif
796  //**********************************************************************************************
797 
798  //**Assignment to sparse vectors****************************************************************
811  template< typename VT1 > // Type of the target sparse vector
812  friend inline void assign( SparseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
813  {
815 
819 
820  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
821 
822  const ResultType tmp( rhs );
823  smpAssign( ~lhs, tmp );
824  }
826  //**********************************************************************************************
827 
828  //**Addition assignment to dense vectors********************************************************
841  template< typename VT1 > // Type of the target dense vector
842  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
843  {
845 
846  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
847 
848  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
849  return;
850  }
851 
852  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
853  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
854 
855  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
856  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
857  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
858  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
859 
860  DMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
861  }
863  //**********************************************************************************************
864 
865  //**Addition assignment to dense vectors (kernel selection)*************************************
876  template< typename VT1 // Type of the left-hand side target vector
877  , typename MT1 // Type of the left-hand side matrix operand
878  , typename VT2 > // Type of the right-hand side vector operand
879  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
880  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
881  {
882  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
883  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
884  DMatDVecMultExpr::selectDefaultAddAssignKernel( y, A, x );
885  else
886  DMatDVecMultExpr::selectBlasAddAssignKernel( y, A, x );
887  }
889  //**********************************************************************************************
890 
891  //**Addition assignment to dense vectors (kernel selection)*************************************
902  template< typename VT1 // Type of the left-hand side target vector
903  , typename MT1 // Type of the left-hand side matrix operand
904  , typename VT2 > // Type of the right-hand side vector operand
905  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
906  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
907  {
908  smpAddAssign( y, A * x );
909  }
911  //**********************************************************************************************
912 
913  //**Default addition assignment to dense vectors************************************************
927  template< typename VT1 // Type of the left-hand side target vector
928  , typename MT1 // Type of the left-hand side matrix operand
929  , typename VT2 > // Type of the right-hand side vector operand
930  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
931  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
932  {
933  y.addAssign( A * x );
934  }
936  //**********************************************************************************************
937 
938  //**Vectorized default addition assignment to dense vectors*************************************
952  template< typename VT1 // Type of the left-hand side target vector
953  , typename MT1 // Type of the left-hand side matrix operand
954  , typename VT2 > // Type of the right-hand side vector operand
955  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
956  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
957  {
958  typedef IntrinsicTrait<ElementType> IT;
959 
960  const size_t M( A.rows() );
961  const size_t N( A.columns() );
962 
963  size_t i( 0UL );
964 
965  for( ; (i+8UL) <= M; i+=8UL ) {
966  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
967  for( size_t j=0UL; j<N; j+=IT::size ) {
968  const IntrinsicType x1( x.load(j) );
969  xmm1 = xmm1 + A.load(i ,j) * x1;
970  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
971  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
972  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
973  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
974  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
975  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
976  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
977  }
978  y[i ] += sum( xmm1 );
979  y[i+1UL] += sum( xmm2 );
980  y[i+2UL] += sum( xmm3 );
981  y[i+3UL] += sum( xmm4 );
982  y[i+4UL] += sum( xmm5 );
983  y[i+5UL] += sum( xmm6 );
984  y[i+6UL] += sum( xmm7 );
985  y[i+7UL] += sum( xmm8 );
986  }
987  for( ; (i+4UL) <= M; i+=4UL ) {
988  IntrinsicType xmm1, xmm2, xmm3, xmm4;
989  for( size_t j=0UL; j<N; j+=IT::size ) {
990  const IntrinsicType x1( x.load(j) );
991  xmm1 = xmm1 + A.load(i ,j) * x1;
992  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
993  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
994  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
995  }
996  y[i ] += sum( xmm1 );
997  y[i+1UL] += sum( xmm2 );
998  y[i+2UL] += sum( xmm3 );
999  y[i+3UL] += sum( xmm4 );
1000  }
1001  for( ; (i+3UL) <= M; i+=3UL ) {
1002  IntrinsicType xmm1, xmm2, xmm3;
1003  for( size_t j=0UL; j<N; j+=IT::size ) {
1004  const IntrinsicType x1( x.load(j) );
1005  xmm1 = xmm1 + A.load(i ,j) * x1;
1006  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1007  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1008  }
1009  y[i ] += sum( xmm1 );
1010  y[i+1UL] += sum( xmm2 );
1011  y[i+2UL] += sum( xmm3 );
1012  }
1013  for( ; (i+2UL) <= M; i+=2UL ) {
1014  IntrinsicType xmm1, xmm2;
1015  for( size_t j=0UL; j<N; j+=IT::size ) {
1016  const IntrinsicType x1( x.load(j) );
1017  xmm1 = xmm1 + A.load(i ,j) * x1;
1018  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1019  }
1020  y[i ] += sum( xmm1 );
1021  y[i+1UL] += sum( xmm2 );
1022  }
1023  if( i < M ) {
1024  IntrinsicType xmm1;
1025  for( size_t j=0UL; j<N; j+=IT::size ) {
1026  xmm1 = xmm1 + A.load(i,j) * x.load(j);
1027  }
1028  y[i] += sum( xmm1 );
1029  }
1030  }
1032  //**********************************************************************************************
1033 
1034  //**BLAS-based addition assignment to dense vectors (default)***********************************
1048  template< typename VT1 // Type of the left-hand side target vector
1049  , typename MT1 // Type of the left-hand side matrix operand
1050  , typename VT2 > // Type of the right-hand side vector operand
1051  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1052  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1053  {
1054  selectDefaultAddAssignKernel( y, A, x );
1055  }
1057  //**********************************************************************************************
1058 
1059  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1060 #if BLAZE_BLAS_MODE
1061 
1074  template< typename VT1 // Type of the left-hand side target vector
1075  , typename MT1 // Type of the left-hand side matrix operand
1076  , typename VT2 > // Type of the right-hand side vector operand
1077  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1078  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1079  {
1080  using boost::numeric_cast;
1081 
1085 
1086  const int M ( numeric_cast<int>( A.rows() ) );
1087  const int N ( numeric_cast<int>( A.columns() ) );
1088  const int lda( numeric_cast<int>( A.spacing() ) );
1089 
1090  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
1091  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1092  }
1094 #endif
1095  //**********************************************************************************************
1096 
1097  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1098 #if BLAZE_BLAS_MODE
1099 
1112  template< typename VT1 // Type of the left-hand side target vector
1113  , typename MT1 // Type of the left-hand side matrix operand
1114  , typename VT2 > // Type of the right-hand side vector operand
1115  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1116  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1117  {
1118  using boost::numeric_cast;
1119 
1123 
1124  const int M ( numeric_cast<int>( A.rows() ) );
1125  const int N ( numeric_cast<int>( A.columns() ) );
1126  const int lda( numeric_cast<int>( A.spacing() ) );
1127 
1128  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
1129  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1130  }
1132 #endif
1133  //**********************************************************************************************
1134 
1135  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1136 #if BLAZE_BLAS_MODE
1137 
1150  template< typename VT1 // Type of the left-hand side target vector
1151  , typename MT1 // Type of the left-hand side matrix operand
1152  , typename VT2 > // Type of the right-hand side vector operand
1153  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1154  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1155  {
1156  using boost::numeric_cast;
1157 
1161  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1162  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1163  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1164 
1165  const int M ( numeric_cast<int>( A.rows() ) );
1166  const int N ( numeric_cast<int>( A.columns() ) );
1167  const int lda( numeric_cast<int>( A.spacing() ) );
1168  const complex<float> alpha( 1.0F, 0.0F );
1169  const complex<float> beta ( 1.0F, 0.0F );
1170 
1171  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1172  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1173  }
1175 #endif
1176  //**********************************************************************************************
1177 
1178  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1179 #if BLAZE_BLAS_MODE
1180 
1193  template< typename VT1 // Type of the left-hand side target vector
1194  , typename MT1 // Type of the left-hand side matrix operand
1195  , typename VT2 > // Type of the right-hand side vector operand
1196  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1197  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1198  {
1199  using boost::numeric_cast;
1200 
1204  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1205  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1206  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1207 
1208  const int M ( numeric_cast<int>( A.rows() ) );
1209  const int N ( numeric_cast<int>( A.columns() ) );
1210  const int lda( numeric_cast<int>( A.spacing() ) );
1211  const complex<double> alpha( 1.0, 0.0 );
1212  const complex<double> beta ( 1.0, 0.0 );
1213 
1214  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1215  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1216  }
1218 #endif
1219  //**********************************************************************************************
1220 
1221  //**Addition assignment to sparse vectors*******************************************************
1222  // No special implementation for the addition assignment to sparse vectors.
1223  //**********************************************************************************************
1224 
1225  //**Subtraction assignment to dense vectors*****************************************************
1238  template< typename VT1 > // Type of the target dense vector
1239  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1240  {
1242 
1243  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1244 
1245  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1246  return;
1247  }
1248 
1249  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1250  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1251 
1252  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1253  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1254  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1255  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1256 
1257  DMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1258  }
1260  //**********************************************************************************************
1261 
1262  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1273  template< typename VT1 // Type of the left-hand side target vector
1274  , typename MT1 // Type of the left-hand side matrix operand
1275  , typename VT2 > // Type of the right-hand side vector operand
1276  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
1277  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1278  {
1279  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1280  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1281  DMatDVecMultExpr::selectDefaultSubAssignKernel( y, A, x );
1282  else
1283  DMatDVecMultExpr::selectBlasSubAssignKernel( y, A, x );
1284  }
1286  //**********************************************************************************************
1287 
1288  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1299  template< typename VT1 // Type of the left-hand side target vector
1300  , typename MT1 // Type of the left-hand side matrix operand
1301  , typename VT2 > // Type of the right-hand side vector operand
1302  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2> >::Type
1303  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1304  {
1305  smpSubAssign( y, A * x );
1306  }
1308  //**********************************************************************************************
1309 
1310  //**Default subtraction assignment to dense vectors*********************************************
1324  template< typename VT1 // Type of the left-hand side target vector
1325  , typename MT1 // Type of the left-hand side matrix operand
1326  , typename VT2 > // Type of the right-hand side vector operand
1327  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1328  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1329  {
1330  y.subAssign( A * x );
1331  }
1333  //**********************************************************************************************
1334 
1335  //**Vectorized default subtraction assignment to dense vectors**********************************
1349  template< typename VT1 // Type of the left-hand side target vector
1350  , typename MT1 // Type of the left-hand side matrix operand
1351  , typename VT2 > // Type of the right-hand side vector operand
1352  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1353  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1354  {
1355  typedef IntrinsicTrait<ElementType> IT;
1356 
1357  const size_t M( A.rows() );
1358  const size_t N( A.columns() );
1359 
1360  size_t i( 0UL );
1361 
1362  for( ; (i+8UL) <= M; i+=8UL ) {
1363  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1364  for( size_t j=0UL; j<N; j+=IT::size ) {
1365  const IntrinsicType x1( x.load(j) );
1366  xmm1 = xmm1 + A.load(i ,j) * x1;
1367  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1368  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1369  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1370  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1371  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1372  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1373  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1374  }
1375  y[i ] -= sum( xmm1 );
1376  y[i+1UL] -= sum( xmm2 );
1377  y[i+2UL] -= sum( xmm3 );
1378  y[i+3UL] -= sum( xmm4 );
1379  y[i+4UL] -= sum( xmm5 );
1380  y[i+5UL] -= sum( xmm6 );
1381  y[i+6UL] -= sum( xmm7 );
1382  y[i+7UL] -= sum( xmm8 );
1383  }
1384  for( ; (i+4UL) <= M; i+=4UL ) {
1385  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1386  for( size_t j=0UL; j<N; j+=IT::size ) {
1387  const IntrinsicType x1( x.load(j) );
1388  xmm1 = xmm1 + A.load(i ,j) * x1;
1389  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1390  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1391  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1392  }
1393  y[i ] -= sum( xmm1 );
1394  y[i+1UL] -= sum( xmm2 );
1395  y[i+2UL] -= sum( xmm3 );
1396  y[i+3UL] -= sum( xmm4 );
1397  }
1398  for( ; (i+3UL) <= M; i+=3UL ) {
1399  IntrinsicType xmm1, xmm2, xmm3;
1400  for( size_t j=0UL; j<N; j+=IT::size ) {
1401  const IntrinsicType x1( x.load(j) );
1402  xmm1 = xmm1 + A.load(i ,j) * x1;
1403  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1404  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1405  }
1406  y[i ] -= sum( xmm1 );
1407  y[i+1UL] -= sum( xmm2 );
1408  y[i+2UL] -= sum( xmm3 );
1409  }
1410  for( ; (i+2UL) <= M; i+=2UL ) {
1411  IntrinsicType xmm1, xmm2;
1412  for( size_t j=0UL; j<N; j+=IT::size ) {
1413  const IntrinsicType x1( x.load(j) );
1414  xmm1 = xmm1 + A.load(i ,j) * x1;
1415  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1416  }
1417  y[i ] -= sum( xmm1 );
1418  y[i+1UL] -= sum( xmm2 );
1419  }
1420  if( i < M ) {
1421  IntrinsicType xmm1;
1422  for( size_t j=0UL; j<N; j+=IT::size ) {
1423  xmm1 = xmm1 + A.load(i,j) * x.load(j);
1424  }
1425  y[i] -= sum( xmm1 );
1426  }
1427  }
1429  //**********************************************************************************************
1430 
1431  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1445  template< typename VT1 // Type of the left-hand side target vector
1446  , typename MT1 // Type of the left-hand side matrix operand
1447  , typename VT2 > // Type of the right-hand side vector operand
1448  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1449  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1450  {
1451  selectDefaultSubAssignKernel( y, A, x );
1452  }
1454  //**********************************************************************************************
1455 
1456  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1457 #if BLAZE_BLAS_MODE
1458 
1471  template< typename VT1 // Type of the left-hand side target vector
1472  , typename MT1 // Type of the left-hand side matrix operand
1473  , typename VT2 > // Type of the right-hand side vector operand
1474  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1475  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1476  {
1477  using boost::numeric_cast;
1478 
1482 
1483  const int M ( numeric_cast<int>( A.rows() ) );
1484  const int N ( numeric_cast<int>( A.columns() ) );
1485  const int lda( numeric_cast<int>( A.spacing() ) );
1486 
1487  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1488  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1489  }
1491 #endif
1492  //**********************************************************************************************
1493 
1494  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1495 #if BLAZE_BLAS_MODE
1496 
1509  template< typename VT1 // Type of the left-hand side target vector
1510  , typename MT1 // Type of the left-hand side matrix operand
1511  , typename VT2 > // Type of the right-hand side vector operand
1512  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1513  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1514  {
1515  using boost::numeric_cast;
1516 
1520 
1521  const int M ( numeric_cast<int>( A.rows() ) );
1522  const int N ( numeric_cast<int>( A.columns() ) );
1523  const int lda( numeric_cast<int>( A.spacing() ) );
1524 
1525  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1526  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1527  }
1529 #endif
1530  //**********************************************************************************************
1531 
1532  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1533 #if BLAZE_BLAS_MODE
1534 
1547  template< typename VT1 // Type of the left-hand side target vector
1548  , typename MT1 // Type of the left-hand side matrix operand
1549  , typename VT2 > // Type of the right-hand side vector operand
1550  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1551  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1552  {
1553  using boost::numeric_cast;
1554 
1558  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1559  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1560  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1561 
1562  const int M ( numeric_cast<int>( A.rows() ) );
1563  const int N ( numeric_cast<int>( A.columns() ) );
1564  const int lda( numeric_cast<int>( A.spacing() ) );
1565  const complex<float> alpha( -1.0F, 0.0F );
1566  const complex<float> beta ( 1.0F, 0.0F );
1567 
1568  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1569  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1570  }
1572 #endif
1573  //**********************************************************************************************
1574 
1575  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1576 #if BLAZE_BLAS_MODE
1577 
1590  template< typename VT1 // Type of the left-hand side target vector
1591  , typename MT1 // Type of the left-hand side matrix operand
1592  , typename VT2 > // Type of the right-hand side vector operand
1593  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1594  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1595  {
1596  using boost::numeric_cast;
1597 
1601  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1602  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1603  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1604 
1605  const int M ( numeric_cast<int>( A.rows() ) );
1606  const int N ( numeric_cast<int>( A.columns() ) );
1607  const int lda( numeric_cast<int>( A.spacing() ) );
1608  const complex<double> alpha( -1.0, 0.0 );
1609  const complex<double> beta ( 1.0, 0.0 );
1610 
1611  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1612  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1613  }
1615 #endif
1616  //**********************************************************************************************
1617 
1618  //**Subtraction assignment to sparse vectors****************************************************
1619  // No special implementation for the subtraction assignment to sparse vectors.
1620  //**********************************************************************************************
1621 
1622  //**Multiplication assignment to dense vectors**************************************************
1635  template< typename VT1 > // Type of the target dense vector
1636  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1637  {
1639 
1643 
1644  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1645 
1646  const ResultType tmp( rhs );
1647  multAssign( ~lhs, tmp );
1648  }
1650  //**********************************************************************************************
1651 
1652  //**Multiplication assignment to sparse vectors*************************************************
1653  // No special implementation for the multiplication assignment to sparse vectors.
1654  //**********************************************************************************************
1655 
1656  //**Compile time checks*************************************************************************
1663  //**********************************************************************************************
1664 };
1665 //*************************************************************************************************
1666 
1667 
1668 
1669 
1670 //=================================================================================================
1671 //
1672 // DVECSCALARMULTEXPR SPECIALIZATION
1673 //
1674 //=================================================================================================
1675 
1676 //*************************************************************************************************
1684 template< typename MT // Type of the left-hand side dense matrix
1685  , typename VT // Type of the right-hand side dense vector
1686  , typename ST > // Type of the scalar value
1687 class DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >
1688  : public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1689  , private VecScalarMultExpr
1690  , private Computation
1691 {
1692  private:
1693  //**Type definitions****************************************************************************
1694  typedef DMatDVecMultExpr<MT,VT> MVM;
1695  typedef typename MVM::ResultType RES;
1696  typedef typename MT::ResultType MRT;
1697  typedef typename VT::ResultType VRT;
1698  typedef typename MRT::ElementType MET;
1699  typedef typename VRT::ElementType VET;
1700  typedef typename MT::CompositeType MCT;
1701  typedef typename VT::CompositeType VCT;
1702  //**********************************************************************************************
1703 
1704  //**********************************************************************************************
1706  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1707  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1708  //**********************************************************************************************
1709 
1710  //**********************************************************************************************
1712  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<MT>::value };
1713  //**********************************************************************************************
1714 
1715  //**********************************************************************************************
1717 
1719  template< typename T1, typename T2, typename T3, typename T4 >
1720  struct UseSMPAssignKernel {
1721  enum { value = evaluateMatrix || evaluateVector };
1722  };
1723  //**********************************************************************************************
1724 
1725  //**********************************************************************************************
1727 
1730  template< typename T1, typename T2, typename T3, typename T4 >
1731  struct UseSinglePrecisionKernel {
1732  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1733  IsFloat<typename T1::ElementType>::value &&
1734  IsFloat<typename T2::ElementType>::value &&
1735  IsFloat<typename T3::ElementType>::value &&
1736  !IsComplex<T4>::value };
1737  };
1738  //**********************************************************************************************
1739 
1740  //**********************************************************************************************
1742 
1745  template< typename T1, typename T2, typename T3, typename T4 >
1746  struct UseDoublePrecisionKernel {
1747  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1748  IsDouble<typename T1::ElementType>::value &&
1749  IsDouble<typename T2::ElementType>::value &&
1750  IsDouble<typename T3::ElementType>::value &&
1751  !IsComplex<T4>::value };
1752  };
1753  //**********************************************************************************************
1754 
1755  //**********************************************************************************************
1757 
1760  template< typename T1, typename T2, typename T3 >
1761  struct UseSinglePrecisionComplexKernel {
1762  typedef complex<float> Type;
1763  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1764  IsSame<typename T1::ElementType,Type>::value &&
1765  IsSame<typename T2::ElementType,Type>::value &&
1766  IsSame<typename T3::ElementType,Type>::value };
1767  };
1768  //**********************************************************************************************
1769 
1770  //**********************************************************************************************
1772 
1775  template< typename T1, typename T2, typename T3 >
1776  struct UseDoublePrecisionComplexKernel {
1777  typedef complex<double> Type;
1778  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1779  IsSame<typename T1::ElementType,Type>::value &&
1780  IsSame<typename T2::ElementType,Type>::value &&
1781  IsSame<typename T3::ElementType,Type>::value };
1782  };
1783  //**********************************************************************************************
1784 
1785  //**********************************************************************************************
1787 
1789  template< typename T1, typename T2, typename T3, typename T4 >
1790  struct UseDefaultKernel {
1791  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1792  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1793  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1794  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1795  };
1796  //**********************************************************************************************
1797 
1798  //**********************************************************************************************
1800 
1803  template< typename T1, typename T2, typename T3, typename T4 >
1804  struct UseVectorizedDefaultKernel {
1805  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1806  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1807  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1808  IsSame<typename T1::ElementType,T4>::value &&
1809  IntrinsicTrait<typename T1::ElementType>::addition &&
1810  IntrinsicTrait<typename T1::ElementType>::multiplication };
1811  };
1812  //**********************************************************************************************
1813 
1814  public:
1815  //**Type definitions****************************************************************************
1816  typedef DVecScalarMultExpr<MVM,ST,false> This;
1817  typedef typename MultTrait<RES,ST>::Type ResultType;
1818  typedef typename ResultType::TransposeType TransposeType;
1819  typedef typename ResultType::ElementType ElementType;
1820  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1821  typedef const ElementType ReturnType;
1822  typedef const ResultType CompositeType;
1823 
1825  typedef const DMatDVecMultExpr<MT,VT> LeftOperand;
1826 
1828  typedef ST RightOperand;
1829 
1831  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
1832 
1834  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
1835  //**********************************************************************************************
1836 
1837  //**Compilation flags***************************************************************************
1839  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
1840  IsSame<MET,VET>::value &&
1841  IsSame<MET,ST>::value &&
1842  IntrinsicTrait<MET>::addition &&
1843  IntrinsicTrait<MET>::multiplication };
1844 
1846  enum { smpAssignable = !evaluateMatrix && !evaluateVector };
1847  //**********************************************************************************************
1848 
1849  //**Constructor*********************************************************************************
1855  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
1856  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1857  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1858  {}
1859  //**********************************************************************************************
1860 
1861  //**Subscript operator**************************************************************************
1867  inline ReturnType operator[]( size_t index ) const {
1868  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1869  return vector_[index] * scalar_;
1870  }
1871  //**********************************************************************************************
1872 
1873  //**Size function*******************************************************************************
1878  inline size_t size() const {
1879  return vector_.size();
1880  }
1881  //**********************************************************************************************
1882 
1883  //**Left operand access*************************************************************************
1888  inline LeftOperand leftOperand() const {
1889  return vector_;
1890  }
1891  //**********************************************************************************************
1892 
1893  //**Right operand access************************************************************************
1898  inline RightOperand rightOperand() const {
1899  return scalar_;
1900  }
1901  //**********************************************************************************************
1902 
1903  //**********************************************************************************************
1909  template< typename T >
1910  inline bool canAlias( const T* alias ) const {
1911  return vector_.canAlias( alias );
1912  }
1913  //**********************************************************************************************
1914 
1915  //**********************************************************************************************
1921  template< typename T >
1922  inline bool isAliased( const T* alias ) const {
1923  return vector_.isAliased( alias );
1924  }
1925  //**********************************************************************************************
1926 
1927  //**********************************************************************************************
1932  inline bool isAligned() const {
1933  return vector_.isAligned();
1934  }
1935  //**********************************************************************************************
1936 
1937  //**********************************************************************************************
1942  inline bool canSMPAssign() const {
1943  typename MVM::LeftOperand A( vector_.leftOperand() );
1944  return ( !BLAZE_BLAS_IS_PARALLEL ||
1945  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1946  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) ) &&
1948  }
1949  //**********************************************************************************************
1950 
1951  private:
1952  //**Member variables****************************************************************************
1953  LeftOperand vector_;
1954  RightOperand scalar_;
1955  //**********************************************************************************************
1956 
1957  //**Assignment to dense vectors*****************************************************************
1969  template< typename VT1 > // Type of the target dense vector
1970  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
1971  {
1973 
1974  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1975 
1976  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1977  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1978 
1979  if( left.rows() == 0UL ) {
1980  return;
1981  }
1982  else if( left.columns() == 0UL ) {
1983  reset( ~lhs );
1984  return;
1985  }
1986 
1987  LT A( left ); // Evaluation of the left-hand side dense matrix operand
1988  RT x( right ); // Evaluation of the right-hand side dense vector operand
1989 
1990  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
1991  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
1992  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
1993  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1994 
1995  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
1996  }
1997  //**********************************************************************************************
1998 
1999  //**Assignment to dense vectors (kernel selection)**********************************************
2010  template< typename VT1 // Type of the left-hand side target vector
2011  , typename MT1 // Type of the left-hand side matrix operand
2012  , typename VT2 // Type of the right-hand side vector operand
2013  , typename ST2 > // Type of the scalar value
2014  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2015  selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2016  {
2017  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2018  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2019  DVecScalarMultExpr::selectDefaultAssignKernel( y, A, x, scalar );
2020  else
2021  DVecScalarMultExpr::selectBlasAssignKernel( y, A, x, scalar );
2022  }
2023  //**********************************************************************************************
2024 
2025  //**Assignment to dense vectors (kernel selection)**********************************************
2036  template< typename VT1 // Type of the left-hand side target vector
2037  , typename MT1 // Type of the left-hand side matrix operand
2038  , typename VT2 // Type of the right-hand side vector operand
2039  , typename ST2 > // Type of the scalar value
2040  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2041  selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2042  {
2043  smpAssign( y, A * x * scalar );
2044  }
2045  //**********************************************************************************************
2046 
2047  //**Default assignment to dense vectors*********************************************************
2061  template< typename VT1 // Type of the left-hand side target vector
2062  , typename MT1 // Type of the left-hand side matrix operand
2063  , typename VT2 // Type of the right-hand side vector operand
2064  , typename ST2 > // Type of the scalar value
2065  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2066  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2067  {
2068  y.assign( A * x * scalar );
2069  }
2070  //**********************************************************************************************
2071 
2072  //**Vectorized default assignment to dense vectors**********************************************
2086  template< typename VT1 // Type of the left-hand side target vector
2087  , typename MT1 // Type of the left-hand side matrix operand
2088  , typename VT2 // Type of the right-hand side vector operand
2089  , typename ST2 > // Type of the scalar value
2090  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2091  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2092  {
2093  typedef IntrinsicTrait<ElementType> IT;
2094 
2095  const size_t M( A.rows() );
2096  const size_t N( A.columns() );
2097 
2098  size_t i( 0UL );
2099 
2100  for( ; (i+8UL) <= M; i+=8UL ) {
2101  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2102  for( size_t j=0UL; j<N; j+=IT::size ) {
2103  const IntrinsicType x1( x.load(j) );
2104  xmm1 = xmm1 + A.load(i ,j) * x1;
2105  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2106  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2107  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2108  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2109  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2110  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2111  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2112  }
2113  y[i ] = sum( xmm1 ) * scalar;
2114  y[i+1UL] = sum( xmm2 ) * scalar;
2115  y[i+2UL] = sum( xmm3 ) * scalar;
2116  y[i+3UL] = sum( xmm4 ) * scalar;
2117  y[i+4UL] = sum( xmm5 ) * scalar;
2118  y[i+5UL] = sum( xmm6 ) * scalar;
2119  y[i+6UL] = sum( xmm7 ) * scalar;
2120  y[i+7UL] = sum( xmm8 ) * scalar;
2121  }
2122  for( ; (i+4UL) <= M; i+=4UL ) {
2123  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2124  for( size_t j=0UL; j<N; j+=IT::size ) {
2125  const IntrinsicType x1( x.load(j) );
2126  xmm1 = xmm1 + A.load(i ,j) * x1;
2127  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2128  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2129  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2130  }
2131  y[i ] = sum( xmm1 ) * scalar;
2132  y[i+1UL] = sum( xmm2 ) * scalar;
2133  y[i+2UL] = sum( xmm3 ) * scalar;
2134  y[i+3UL] = sum( xmm4 ) * scalar;
2135  }
2136  for( ; (i+3UL) <= M; i+=3UL ) {
2137  IntrinsicType xmm1, xmm2, xmm3;
2138  for( size_t j=0UL; j<N; j+=IT::size ) {
2139  const IntrinsicType x1( x.load(j) );
2140  xmm1 = xmm1 + A.load(i ,j) * x1;
2141  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2142  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2143  }
2144  y[i ] = sum( xmm1 ) * scalar;
2145  y[i+1UL] = sum( xmm2 ) * scalar;
2146  y[i+2UL] = sum( xmm3 ) * scalar;
2147  }
2148  for( ; (i+2UL) <= M; i+=2UL ) {
2149  IntrinsicType xmm1, xmm2;
2150  for( size_t j=0UL; j<N; j+=IT::size ) {
2151  const IntrinsicType x1( x.load(j) );
2152  xmm1 = xmm1 + A.load(i ,j) * x1;
2153  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2154  }
2155  y[i ] = sum( xmm1 ) * scalar;
2156  y[i+1UL] = sum( xmm2 ) * scalar;
2157  }
2158  if( i < M ) {
2159  IntrinsicType xmm1;
2160  for( size_t j=0UL; j<N; j+=IT::size ) {
2161  xmm1 = xmm1 + A.load(i,j) * x.load(j);
2162  }
2163  y[i] = sum( xmm1 ) * scalar;
2164  }
2165  }
2166  //**********************************************************************************************
2167 
2168  //**BLAS-based assignment to dense vectors (default)********************************************
2182  template< typename VT1 // Type of the left-hand side target vector
2183  , typename MT1 // Type of the left-hand side matrix operand
2184  , typename VT2 // Type of the right-hand side vector operand
2185  , typename ST2 > // Type of the scalar value
2186  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2187  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2188  {
2189  selectDefaultAssignKernel( y, A, x, scalar );
2190  }
2191  //**********************************************************************************************
2192 
2193  //**BLAS-based assignment to dense vectors (single precision)***********************************
2194 #if BLAZE_BLAS_MODE
2195 
2208  template< typename VT1 // Type of the left-hand side target vector
2209  , typename MT1 // Type of the left-hand side matrix operand
2210  , typename VT2 // Type of the right-hand side vector operand
2211  , typename ST2 > // Type of the scalar value
2212  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2213  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2214  {
2215  using boost::numeric_cast;
2216 
2220 
2221  const int M ( numeric_cast<int>( A.rows() ) );
2222  const int N ( numeric_cast<int>( A.columns() ) );
2223  const int lda( numeric_cast<int>( A.spacing() ) );
2224 
2225  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2226  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2227  }
2228 #endif
2229  //**********************************************************************************************
2230 
2231  //**BLAS-based assignment to dense vectors (double precision)***********************************
2232 #if BLAZE_BLAS_MODE
2233 
2246  template< typename VT1 // Type of the left-hand side target vector
2247  , typename MT1 // Type of the left-hand side matrix operand
2248  , typename VT2 // Type of the right-hand side vector operand
2249  , typename ST2 > // Type of the scalar value
2250  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2251  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2252  {
2253  using boost::numeric_cast;
2254 
2258 
2259  const int M ( numeric_cast<int>( A.rows() ) );
2260  const int N ( numeric_cast<int>( A.columns() ) );
2261  const int lda( numeric_cast<int>( A.spacing() ) );
2262 
2263  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2264  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2265  }
2266 #endif
2267  //**********************************************************************************************
2268 
2269  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2270 #if BLAZE_BLAS_MODE
2271 
2284  template< typename VT1 // Type of the left-hand side target vector
2285  , typename MT1 // Type of the left-hand side matrix operand
2286  , typename VT2 // Type of the right-hand side vector operand
2287  , typename ST2 > // Type of the scalar value
2288  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2289  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2290  {
2291  using boost::numeric_cast;
2292 
2296  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2297  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2298  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2299 
2300  const int M ( numeric_cast<int>( A.rows() ) );
2301  const int N ( numeric_cast<int>( A.columns() ) );
2302  const int lda( numeric_cast<int>( A.spacing() ) );
2303  const complex<float> alpha( scalar );
2304  const complex<float> beta ( 0.0F, 0.0F );
2305 
2306  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2307  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2308  }
2309 #endif
2310  //**********************************************************************************************
2311 
2312  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2313 #if BLAZE_BLAS_MODE
2314 
2327  template< typename VT1 // Type of the left-hand side target vector
2328  , typename MT1 // Type of the left-hand side matrix operand
2329  , typename VT2 // Type of the right-hand side vector operand
2330  , typename ST2 > // Type of the scalar value
2331  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2332  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2333  {
2334  using boost::numeric_cast;
2335 
2339  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2340  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2341  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2342 
2343  const int M ( numeric_cast<int>( A.rows() ) );
2344  const int N ( numeric_cast<int>( A.columns() ) );
2345  const int lda( numeric_cast<int>( A.spacing() ) );
2346  const complex<double> alpha( scalar );
2347  const complex<double> beta ( 0.0, 0.0 );
2348 
2349  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2350  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2351  }
2352 #endif
2353  //**********************************************************************************************
2354 
2355  //**Assignment to sparse vectors****************************************************************
2367  template< typename VT1 > // Type of the target sparse vector
2368  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2369  {
2371 
2375 
2376  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2377 
2378  const ResultType tmp( rhs );
2379  smpAssign( ~lhs, tmp );
2380  }
2381  //**********************************************************************************************
2382 
2383  //**Addition assignment to dense vectors********************************************************
2395  template< typename VT1 > // Type of the target dense vector
2396  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2397  {
2399 
2400  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2401 
2402  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2403  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2404 
2405  if( left.rows() == 0UL || left.columns() == 0UL ) {
2406  return;
2407  }
2408 
2409  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2410  RT x( right ); // Evaluation of the right-hand side dense vector operand
2411 
2412  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2413  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2414  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2415  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2416 
2417  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2418  }
2419  //**********************************************************************************************
2420 
2421  //**Addition assignment to dense vectors (kernel selection)*************************************
2432  template< typename VT1 // Type of the left-hand side target vector
2433  , typename MT1 // Type of the left-hand side matrix operand
2434  , typename VT2 // Type of the right-hand side vector operand
2435  , typename ST2 > // Type of the scalar value
2436  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2437  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2438  {
2439  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2440  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2441  DVecScalarMultExpr::selectDefaultAddAssignKernel( y, A, x, scalar );
2442  else
2443  DVecScalarMultExpr::selectBlasAddAssignKernel( y, A, x, scalar );
2444  }
2445  //**********************************************************************************************
2446 
2447  //**Addition assignment to dense vectors (kernel selection)*************************************
2458  template< typename VT1 // Type of the left-hand side target vector
2459  , typename MT1 // Type of the left-hand side matrix operand
2460  , typename VT2 // Type of the right-hand side vector operand
2461  , typename ST2 > // Type of the scalar value
2462  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2463  selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2464  {
2465  smpAddAssign( y, A * x * scalar );
2466  }
2467  //**********************************************************************************************
2468 
2469  //**Default addition assignment to dense vectors************************************************
2483  template< typename VT1 // Type of the left-hand side target vector
2484  , typename MT1 // Type of the left-hand side matrix operand
2485  , typename VT2 // Type of the right-hand side vector operand
2486  , typename ST2 > // Type of the scalar value
2487  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2488  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2489  {
2490  y.addAssign( A * x * scalar );
2491  }
2492  //**********************************************************************************************
2493 
2494  //**Vectorized default addition assignment to dense vectors*************************************
2508  template< typename VT1 // Type of the left-hand side target vector
2509  , typename MT1 // Type of the left-hand side matrix operand
2510  , typename VT2 // Type of the right-hand side vector operand
2511  , typename ST2 > // Type of the scalar value
2512  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2513  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2514  {
2515  typedef IntrinsicTrait<ElementType> IT;
2516 
2517  const size_t M( A.rows() );
2518  const size_t N( A.columns() );
2519 
2520  size_t i( 0UL );
2521 
2522  for( ; (i+8UL) <= M; i+=8UL ) {
2523  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2524  for( size_t j=0UL; j<N; j+=IT::size ) {
2525  const IntrinsicType x1( x.load(j) );
2526  xmm1 = xmm1 + A.load(i ,j) * x1;
2527  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2528  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2529  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2530  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2531  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2532  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2533  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2534  }
2535  y[i ] += sum( xmm1 ) * scalar;
2536  y[i+1UL] += sum( xmm2 ) * scalar;
2537  y[i+2UL] += sum( xmm3 ) * scalar;
2538  y[i+3UL] += sum( xmm4 ) * scalar;
2539  y[i+4UL] += sum( xmm5 ) * scalar;
2540  y[i+5UL] += sum( xmm6 ) * scalar;
2541  y[i+6UL] += sum( xmm7 ) * scalar;
2542  y[i+7UL] += sum( xmm8 ) * scalar;
2543  }
2544  for( ; (i+4UL) <= M; i+=4UL ) {
2545  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2546  for( size_t j=0UL; j<N; j+=IT::size ) {
2547  const IntrinsicType x1( x.load(j) );
2548  xmm1 = xmm1 + A.load(i ,j) * x1;
2549  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2550  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2551  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2552  }
2553  y[i ] += sum( xmm1 ) * scalar;
2554  y[i+1UL] += sum( xmm2 ) * scalar;
2555  y[i+2UL] += sum( xmm3 ) * scalar;
2556  y[i+3UL] += sum( xmm4 ) * scalar;
2557  }
2558  for( ; (i+3UL) <= M; i+=3UL ) {
2559  IntrinsicType xmm1, xmm2, xmm3;
2560  for( size_t j=0UL; j<N; j+=IT::size ) {
2561  const IntrinsicType x1( x.load(j) );
2562  xmm1 = xmm1 + A.load(i ,j) * x1;
2563  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2564  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2565  }
2566  y[i ] += sum( xmm1 ) * scalar;
2567  y[i+1UL] += sum( xmm2 ) * scalar;
2568  y[i+2UL] += sum( xmm3 ) * scalar;
2569  }
2570  for( ; (i+2UL) <= M; i+=2UL ) {
2571  IntrinsicType xmm1, xmm2;
2572  for( size_t j=0UL; j<N; j+=IT::size ) {
2573  const IntrinsicType x1( x.load(j) );
2574  xmm1 = xmm1 + A.load(i ,j) * x1;
2575  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2576  }
2577  y[i ] += sum( xmm1 ) * scalar;
2578  y[i+1UL] += sum( xmm2 ) * scalar;
2579  }
2580  if( i < M ) {
2581  IntrinsicType xmm1;
2582  for( size_t j=0UL; j<N; j+=IT::size ) {
2583  xmm1 = xmm1 + A.load(i,j) * x.load(j);
2584  }
2585  y[i] += sum( xmm1 ) * scalar;
2586  }
2587  }
2588  //**********************************************************************************************
2589 
2590  //**BLAS-based addition assignment to dense vectors (default)***********************************
2604  template< typename VT1 // Type of the left-hand side target vector
2605  , typename MT1 // Type of the left-hand side matrix operand
2606  , typename VT2 // Type of the right-hand side vector operand
2607  , typename ST2 > // Type of the scalar value
2608  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2609  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2610  {
2611  selectDefaultAddAssignKernel( y, A, x, scalar );
2612  }
2613  //**********************************************************************************************
2614 
2615  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2616 #if BLAZE_BLAS_MODE
2617 
2630  template< typename VT1 // Type of the left-hand side target vector
2631  , typename MT1 // Type of the left-hand side matrix operand
2632  , typename VT2 // Type of the right-hand side vector operand
2633  , typename ST2 > // Type of the scalar value
2634  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2635  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2636  {
2637  using boost::numeric_cast;
2638 
2642 
2643  const int M ( numeric_cast<int>( A.rows() ) );
2644  const int N ( numeric_cast<int>( A.columns() ) );
2645  const int lda( numeric_cast<int>( A.spacing() ) );
2646 
2647  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2648  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2649  }
2650 #endif
2651  //**********************************************************************************************
2652 
2653  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2654 #if BLAZE_BLAS_MODE
2655 
2668  template< typename VT1 // Type of the left-hand side target vector
2669  , typename MT1 // Type of the left-hand side matrix operand
2670  , typename VT2 // Type of the right-hand side vector operand
2671  , typename ST2 > // Type of the scalar value
2672  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2673  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2674  {
2675  using boost::numeric_cast;
2676 
2680 
2681  const int M ( numeric_cast<int>( A.rows() ) );
2682  const int N ( numeric_cast<int>( A.columns() ) );
2683  const int lda( numeric_cast<int>( A.spacing() ) );
2684 
2685  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2686  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2687  }
2688 #endif
2689  //**********************************************************************************************
2690 
2691  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2692 #if BLAZE_BLAS_MODE
2693 
2706  template< typename VT1 // Type of the left-hand side target vector
2707  , typename MT1 // Type of the left-hand side matrix operand
2708  , typename VT2 // Type of the right-hand side vector operand
2709  , typename ST2 > // Type of the scalar value
2710  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2711  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2712  {
2713  using boost::numeric_cast;
2714 
2718  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2719  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2720  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2721 
2722  const int M ( numeric_cast<int>( A.rows() ) );
2723  const int N ( numeric_cast<int>( A.columns() ) );
2724  const int lda( numeric_cast<int>( A.spacing() ) );
2725  const complex<float> alpha( scalar );
2726  const complex<float> beta ( 1.0F, 0.0F );
2727 
2728  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2729  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2730  }
2731 #endif
2732  //**********************************************************************************************
2733 
2734  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2735 #if BLAZE_BLAS_MODE
2736 
2749  template< typename VT1 // Type of the left-hand side target vector
2750  , typename MT1 // Type of the left-hand side matrix operand
2751  , typename VT2 // Type of the right-hand side vector operand
2752  , typename ST2 > // Type of the scalar value
2753  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2754  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2755  {
2756  using boost::numeric_cast;
2757 
2761  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2762  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2763  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2764 
2765  const int M ( numeric_cast<int>( A.rows() ) );
2766  const int N ( numeric_cast<int>( A.columns() ) );
2767  const int lda( numeric_cast<int>( A.spacing() ) );
2768  const complex<double> alpha( scalar );
2769  const complex<double> beta ( 1.0, 0.0 );
2770 
2771  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2772  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2773  }
2774 #endif
2775  //**********************************************************************************************
2776 
2777  //**Addition assignment to sparse vectors*******************************************************
2778  // No special implementation for the addition assignment to sparse vectors.
2779  //**********************************************************************************************
2780 
2781  //**Subtraction assignment to dense vectors*****************************************************
2793  template< typename VT1 > // Type of the target dense vector
2794  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2795  {
2797 
2798  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2799 
2800  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2801  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2802 
2803  if( left.rows() == 0UL || left.columns() == 0UL ) {
2804  return;
2805  }
2806 
2807  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2808  RT x( right ); // Evaluation of the right-hand side dense vector operand
2809 
2810  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2811  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2812  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2813  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2814 
2815  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2816  }
2817  //**********************************************************************************************
2818 
2819  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2830  template< typename VT1 // Type of the left-hand side target vector
2831  , typename MT1 // Type of the left-hand side matrix operand
2832  , typename VT2 // Type of the right-hand side vector operand
2833  , typename ST2 > // Type of the scalar value
2834  static inline typename DisableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2835  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2836  {
2837  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2838  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
2839  DVecScalarMultExpr::selectDefaultSubAssignKernel( y, A, x, scalar );
2840  else
2841  DVecScalarMultExpr::selectBlasSubAssignKernel( y, A, x, scalar );
2842  }
2843  //**********************************************************************************************
2844 
2845  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2856  template< typename VT1 // Type of the left-hand side target vector
2857  , typename MT1 // Type of the left-hand side matrix operand
2858  , typename VT2 // Type of the right-hand side vector operand
2859  , typename ST2 > // Type of the scalar value
2860  static inline typename EnableIf< UseSMPAssignKernel<VT1,MT1,VT2,ST2> >::Type
2861  selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2862  {
2863  smpSubAssign( y, A * x * scalar );
2864  }
2865  //**********************************************************************************************
2866 
2867  //**Default subtraction assignment to dense vectors*********************************************
2881  template< typename VT1 // Type of the left-hand side target vector
2882  , typename MT1 // Type of the left-hand side matrix operand
2883  , typename VT2 // Type of the right-hand side vector operand
2884  , typename ST2 > // Type of the scalar value
2885  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2886  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2887  {
2888  y.subAssign( A * x * scalar );
2889  }
2890  //**********************************************************************************************
2891 
2892  //**Vectorized default subtraction assignment to dense vectors**********************************
2906  template< typename VT1 // Type of the left-hand side target vector
2907  , typename MT1 // Type of the left-hand side matrix operand
2908  , typename VT2 // Type of the right-hand side vector operand
2909  , typename ST2 > // Type of the scalar value
2910  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2911  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2912  {
2913  typedef IntrinsicTrait<ElementType> IT;
2914 
2915  const size_t M( A.rows() );
2916  const size_t N( A.columns() );
2917 
2918  size_t i( 0UL );
2919 
2920  for( ; (i+8UL) <= M; i+=8UL ) {
2921  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2922  for( size_t j=0UL; j<N; j+=IT::size ) {
2923  const IntrinsicType x1( x.load(j) );
2924  xmm1 = xmm1 + A.load(i ,j) * x1;
2925  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2926  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2927  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2928  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2929  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2930  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2931  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2932  }
2933  y[i ] -= sum( xmm1 ) * scalar;
2934  y[i+1UL] -= sum( xmm2 ) * scalar;
2935  y[i+2UL] -= sum( xmm3 ) * scalar;
2936  y[i+3UL] -= sum( xmm4 ) * scalar;
2937  y[i+4UL] -= sum( xmm5 ) * scalar;
2938  y[i+5UL] -= sum( xmm6 ) * scalar;
2939  y[i+6UL] -= sum( xmm7 ) * scalar;
2940  y[i+7UL] -= sum( xmm8 ) * scalar;
2941  }
2942  for( ; (i+4UL) <= M; i+=4UL ) {
2943  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2944  for( size_t j=0UL; j<N; j+=IT::size ) {
2945  const IntrinsicType x1( x.load(j) );
2946  xmm1 = xmm1 + A.load(i ,j) * x1;
2947  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2948  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2949  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2950  }
2951  y[i ] -= sum( xmm1 ) * scalar;
2952  y[i+1UL] -= sum( xmm2 ) * scalar;
2953  y[i+2UL] -= sum( xmm3 ) * scalar;
2954  y[i+3UL] -= sum( xmm4 ) * scalar;
2955  }
2956  for( ; (i+3UL) <= M; i+=3UL ) {
2957  IntrinsicType xmm1, xmm2, xmm3;
2958  for( size_t j=0UL; j<N; j+=IT::size ) {
2959  const IntrinsicType x1( x.load(j) );
2960  xmm1 = xmm1 + A.load(i ,j) * x1;
2961  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2962  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2963  }
2964  y[i ] -= sum( xmm1 ) * scalar;
2965  y[i+1UL] -= sum( xmm2 ) * scalar;
2966  y[i+2UL] -= sum( xmm3 ) * scalar;
2967  }
2968  for( ; (i+2UL) <= M; i+=2UL ) {
2969  IntrinsicType xmm1, xmm2;
2970  for( size_t j=0UL; j<N; j+=IT::size ) {
2971  const IntrinsicType x1( x.load(j) );
2972  xmm1 = xmm1 + A.load(i ,j) * x1;
2973  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2974  }
2975  y[i ] -= sum( xmm1 ) * scalar;
2976  y[i+1UL] -= sum( xmm2 ) * scalar;
2977  }
2978  if( i < M ) {
2979  IntrinsicType xmm1;
2980  for( size_t j=0UL; j<N; j+=IT::size ) {
2981  xmm1 = xmm1 + A.load(i,j) * x.load(j);
2982  }
2983  y[i] -= sum( xmm1 ) * scalar;
2984  }
2985  }
2986  //**********************************************************************************************
2987 
2988  //**BLAS-based subtraction assignment to dense vectors (default)********************************
3002  template< typename VT1 // Type of the left-hand side target vector
3003  , typename MT1 // Type of the left-hand side matrix operand
3004  , typename VT2 // Type of the right-hand side vector operand
3005  , typename ST2 > // Type of the scalar value
3006  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3007  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3008  {
3009  selectDefaultSubAssignKernel( y, A, x, scalar );
3010  }
3011  //**********************************************************************************************
3012 
3013  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
3014 #if BLAZE_BLAS_MODE
3015 
3028  template< typename VT1 // Type of the left-hand side target vector
3029  , typename MT1 // Type of the left-hand side matrix operand
3030  , typename VT2 // Type of the right-hand side vector operand
3031  , typename ST2 > // Type of the scalar value
3032  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3033  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3034  {
3035  using boost::numeric_cast;
3036 
3040 
3041  const int M ( numeric_cast<int>( A.rows() ) );
3042  const int N ( numeric_cast<int>( A.columns() ) );
3043  const int lda( numeric_cast<int>( A.spacing() ) );
3044 
3045  cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
3046  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
3047  }
3048 #endif
3049  //**********************************************************************************************
3050 
3051  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
3052 #if BLAZE_BLAS_MODE
3053 
3066  template< typename VT1 // Type of the left-hand side target vector
3067  , typename MT1 // Type of the left-hand side matrix operand
3068  , typename VT2 // Type of the right-hand side vector operand
3069  , typename ST2 > // Type of the scalar value
3070  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3071  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3072  {
3073  using boost::numeric_cast;
3074 
3078 
3079  const int M ( numeric_cast<int>( A.rows() ) );
3080  const int N ( numeric_cast<int>( A.columns() ) );
3081  const int lda( numeric_cast<int>( A.spacing() ) );
3082 
3083  cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
3084  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
3085  }
3086 #endif
3087  //**********************************************************************************************
3088 
3089  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
3090 #if BLAZE_BLAS_MODE
3091 
3104  template< typename VT1 // Type of the left-hand side target vector
3105  , typename MT1 // Type of the left-hand side matrix operand
3106  , typename VT2 // Type of the right-hand side vector operand
3107  , typename ST2 > // Type of the scalar value
3108  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3109  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3110  {
3111  using boost::numeric_cast;
3112 
3116  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
3117  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
3118  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
3119 
3120  const int M ( numeric_cast<int>( A.rows() ) );
3121  const int N ( numeric_cast<int>( A.columns() ) );
3122  const int lda( numeric_cast<int>( A.spacing() ) );
3123  const complex<float> alpha( -scalar );
3124  const complex<float> beta ( 1.0F, 0.0F );
3125 
3126  cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
3127  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3128  }
3129 #endif
3130  //**********************************************************************************************
3131 
3132  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
3133 #if BLAZE_BLAS_MODE
3134 
3147  template< typename VT1 // Type of the left-hand side target vector
3148  , typename MT1 // Type of the left-hand side matrix operand
3149  , typename VT2 // Type of the right-hand side vector operand
3150  , typename ST2 > // Type of the scalar value
3151  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3152  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3153  {
3154  using boost::numeric_cast;
3155 
3159  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
3160  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
3161  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
3162 
3163  const int M ( numeric_cast<int>( A.rows() ) );
3164  const int N ( numeric_cast<int>( A.columns() ) );
3165  const int lda( numeric_cast<int>( A.spacing() ) );
3166  const complex<double> alpha( -scalar );
3167  const complex<double> beta ( 1.0, 0.0 );
3168 
3169  cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
3170  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3171  }
3172 #endif
3173  //**********************************************************************************************
3174 
3175  //**Subtraction assignment to sparse vectors****************************************************
3176  // No special implementation for the subtraction assignment to sparse vectors.
3177  //**********************************************************************************************
3178 
3179  //**Multiplication assignment to dense vectors**************************************************
3191  template< typename VT1 > // Type of the target dense vector
3192  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3193  {
3195 
3199 
3200  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3201 
3202  const ResultType tmp( rhs );
3203  multAssign( ~lhs, tmp );
3204  }
3205  //**********************************************************************************************
3206 
3207  //**Multiplication assignment to sparse vectors*************************************************
3208  // No special implementation for the multiplication assignment to sparse vectors.
3209  //**********************************************************************************************
3210 
3211  //**Compile time checks*************************************************************************
3220  //**********************************************************************************************
3221 };
3223 //*************************************************************************************************
3224 
3225 
3226 
3227 
3228 //=================================================================================================
3229 //
3230 // GLOBAL BINARY ARITHMETIC OPERATORS
3231 //
3232 //=================================================================================================
3233 
3234 //*************************************************************************************************
3264 template< typename T1 // Type of the left-hand side dense matrix
3265  , typename T2 > // Type of the right-hand side dense vector
3266 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
3268 {
3270 
3271  if( (~mat).columns() != (~vec).size() )
3272  throw std::invalid_argument( "Matrix and vector sizes do not match" );
3273 
3274  return DMatDVecMultExpr<T1,T2>( ~mat, ~vec );
3275 }
3276 //*************************************************************************************************
3277 
3278 
3279 
3280 
3281 //=================================================================================================
3282 //
3283 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
3284 //
3285 //=================================================================================================
3286 
3287 //*************************************************************************************************
3300 template< typename T1 // Type of the left-hand side dense matrix
3301  , bool SO // Storage order of the left-hand side dense matrix
3302  , typename T2 > // Type of the right-hand side dense vector
3303 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
3305 {
3307 
3308  return (~mat).leftOperand() * ( (~mat).rightOperand() * vec );
3309 }
3310 //*************************************************************************************************
3311 
3312 
3313 
3314 
3315 //=================================================================================================
3316 //
3317 // EXPRESSION TRAIT SPECIALIZATIONS
3318 //
3319 //=================================================================================================
3320 
3321 //*************************************************************************************************
3323 template< typename MT, typename VT, bool AF >
3324 struct SubvectorExprTrait< DMatDVecMultExpr<MT,VT>, AF >
3325 {
3326  public:
3327  //**********************************************************************************************
3328  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type, VT >::Type Type;
3329  //**********************************************************************************************
3330 };
3332 //*************************************************************************************************
3333 
3334 } // namespace blaze
3335 
3336 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:254
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:342
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
DMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the DMatDVecMultExpr class.
Definition: DMatDVecMultExpr.h:280
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
Expression object for dense matrix-dense vector multiplications.The DMatDVecMultExpr class represents...
Definition: DMatDVecMultExpr.h:105
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:111
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDVecMultExpr.h:376
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDVecMultExpr.h:243
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
Header file for the DenseVector base class.
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
DMatDVecMultExpr< MT, VT > This
Type of this DMatDVecMultExpr instance.
Definition: DMatDVecMultExpr.h:242
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
size_t size() const
Returns the current size/dimension of the vector.
Definition: DMatDVecMultExpr.h:322
const size_t end_
End of the unrolled calculation loop.
Definition: DMatDVecMultExpr.h:398
Constraint on the data type.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:116
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:113
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:251
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:260
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the dense vector SMP implementation.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:115
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:332
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDVecMultExpr.h:386
Constraint on the data type.
Base class for all matrix/vector multiplication expression templates.The MatVecMultExpr class serves ...
Definition: MatVecMultExpr.h:66
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:257
Constraints on the storage order of matrix types.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDVecMultExpr.h:245
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDVecMultExpr.h:396
Header file for the EnableIf class template.
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: DMatDVecMultExpr.h:397
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDVecMultExpr.h:354
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:112
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDVecMultExpr.h:248
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:251
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDVecMultExpr.h:366
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDVecMultExpr.h:244
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the sparse vector SMP implementation.
const size_t SMP_DMATDVECMULT_THRESHOLD
SMP row-major dense matrix/dense vector multiplication threshold.This threshold represents the system...
Definition: Thresholds.h:139
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDVecMultExpr.h:246
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDVecMultExpr.h:247
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:154
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: DMatDVecMultExpr.h:295
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
const size_t DMATDVECMULT_THRESHOLD
Row-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:57
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:114
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.