All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
67 #include <blaze/system/BLAS.h>
69 #include <blaze/util/Assert.h>
70 #include <blaze/util/Complex.h>
76 #include <blaze/util/DisableIf.h>
77 #include <blaze/util/EnableIf.h>
79 #include <blaze/util/SelectType.h>
80 #include <blaze/util/Types.h>
86 
87 
88 namespace blaze {
89 
90 //=================================================================================================
91 //
92 // CLASS TDVECTDMATMULTEXPR
93 //
94 //=================================================================================================
95 
96 //*************************************************************************************************
103 template< typename VT // Type of the left-hand side dense vector
104  , typename MT > // Type of the right-hand side dense matrix
105 class TDVecTDMatMultExpr : public DenseVector< TDVecTDMatMultExpr<VT,MT>, true >
106  , private TVecMatMultExpr
107  , private Computation
108 {
109  private:
110  //**Type definitions****************************************************************************
111  typedef typename VT::ResultType VRT;
112  typedef typename MT::ResultType MRT;
113  typedef typename VRT::ElementType VET;
114  typedef typename MRT::ElementType MET;
115  typedef typename VT::CompositeType VCT;
116  typedef typename MT::CompositeType MCT;
117  //**********************************************************************************************
118 
119  //**********************************************************************************************
121  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
122  //**********************************************************************************************
123 
124  //**********************************************************************************************
126  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
128  //**********************************************************************************************
129 
130  //**********************************************************************************************
132 
135  template< typename T1, typename T2, typename T3 >
136  struct UseSMPAssignKernel {
137  enum { value = evaluateVector || evaluateMatrix };
138  };
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144 
148  template< typename T1, typename T2, typename T3 >
149  struct UseSinglePrecisionKernel {
150  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
151  IsFloat<typename T1::ElementType>::value &&
152  IsFloat<typename T2::ElementType>::value &&
153  IsFloat<typename T3::ElementType>::value };
154  };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
164  template< typename T1, typename T2, typename T3 >
165  struct UseDoublePrecisionKernel {
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
167  IsDouble<typename T1::ElementType>::value &&
168  IsDouble<typename T2::ElementType>::value &&
169  IsDouble<typename T3::ElementType>::value };
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
180  template< typename T1, typename T2, typename T3 >
181  struct UseSinglePrecisionComplexKernel {
182  typedef complex<float> Type;
183  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
184  IsSame<typename T1::ElementType,Type>::value &&
185  IsSame<typename T2::ElementType,Type>::value &&
186  IsSame<typename T3::ElementType,Type>::value };
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseDoublePrecisionComplexKernel {
199  typedef complex<double> Type;
200  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
201  IsSame<typename T1::ElementType,Type>::value &&
202  IsSame<typename T2::ElementType,Type>::value &&
203  IsSame<typename T3::ElementType,Type>::value };
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
213  template< typename T1, typename T2, typename T3 >
214  struct UseDefaultKernel {
215  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
216  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
217  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
218  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
219  };
221  //**********************************************************************************************
222 
223  //**********************************************************************************************
225 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234  IntrinsicTrait<typename T1::ElementType>::addition &&
235  IntrinsicTrait<typename T1::ElementType>::multiplication };
236  };
238  //**********************************************************************************************
239 
240  public:
241  //**Type definitions****************************************************************************
247  typedef const ElementType ReturnType;
248  typedef const ResultType CompositeType;
249 
251  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
252 
254  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
255 
258 
261  //**********************************************************************************************
262 
263  //**Compilation flags***************************************************************************
265  enum { vectorizable = VT::vectorizable && MT::vectorizable &&
269 
271  enum { smpAssignable = !evaluateVector && !evaluateMatrix };
272  //**********************************************************************************************
273 
274  //**Constructor*********************************************************************************
280  explicit inline TDVecTDMatMultExpr( const VT& vec, const MT& mat )
281  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
282  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
283  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
284  {
285  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
286  }
287  //**********************************************************************************************
288 
289  //**Subscript operator**************************************************************************
295  inline ReturnType operator[]( size_t index ) const {
296  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
297 
298  ElementType res;
299 
300  if( mat_.rows() != 0UL ) {
301  res = vec_[0UL] * mat_(0UL,index);
302  for( size_t j=1UL; j<end_; j+=2UL ) {
303  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
304  }
305  if( end_ < mat_.rows() ) {
306  res += vec_[end_] * mat_(end_,index);
307  }
308  }
309  else {
310  reset( res );
311  }
312 
313  return res;
314  }
315  //**********************************************************************************************
316 
317  //**Size function*******************************************************************************
322  inline size_t size() const {
323  return mat_.columns();
324  }
325  //**********************************************************************************************
326 
327  //**Left operand access*************************************************************************
332  inline LeftOperand leftOperand() const {
333  return vec_;
334  }
335  //**********************************************************************************************
336 
337  //**Right operand access************************************************************************
342  inline RightOperand rightOperand() const {
343  return mat_;
344  }
345  //**********************************************************************************************
346 
347  //**********************************************************************************************
353  template< typename T >
354  inline bool canAlias( const T* alias ) const {
355  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
356  }
357  //**********************************************************************************************
358 
359  //**********************************************************************************************
365  template< typename T >
366  inline bool isAliased( const T* alias ) const {
367  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
368  }
369  //**********************************************************************************************
370 
371  //**********************************************************************************************
376  inline bool isAligned() const {
377  return vec_.isAligned() && mat_.isAligned();
378  }
379  //**********************************************************************************************
380 
381  //**********************************************************************************************
386  inline bool canSMPAssign() const {
387  return ( !BLAZE_BLAS_IS_PARALLEL ||
388  ( IsComputation<MT>::value && !evaluateMatrix ) ||
389  ( mat_.rows() * mat_.columns() < TDVECTDMATMULT_THRESHOLD ) ) &&
391  }
392  //**********************************************************************************************
393 
394  private:
395  //**Member variables****************************************************************************
398  const size_t end_;
399  //**********************************************************************************************
400 
401  //**Assignment to dense vectors*****************************************************************
414  template< typename VT1 > // Type of the target dense vector
415  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
416  {
418 
419  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
420 
421  if( rhs.mat_.rows() == 0UL ) {
422  reset( ~lhs );
423  return;
424  }
425  else if( rhs.mat_.columns() == 0UL ) {
426  return;
427  }
428 
429  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
430  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
431 
432  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
433  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
434  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
435  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
436 
437  TDVecTDMatMultExpr::selectAssignKernel( ~lhs, x, A );
438  }
440  //**********************************************************************************************
441 
442  //**Assignment to dense vectors (kernel selection)**********************************************
453  template< typename VT1 // Type of the left-hand side target vector
454  , typename VT2 // Type of the left-hand side vector operand
455  , typename MT1 > // Type of the right-hand side matrix operand
456  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
457  selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
458  {
459  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
460  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
461  TDVecTDMatMultExpr::selectDefaultAssignKernel( y, x, A );
462  else
463  TDVecTDMatMultExpr::selectBlasAssignKernel( y, x, A );
464  }
466  //**********************************************************************************************
467 
468  //**Assignment to dense vectors (kernel selection)**********************************************
479  template< typename VT1 // Type of the left-hand side target vector
480  , typename VT2 // Type of the left-hand side vector operand
481  , typename MT1 > // Type of the right-hand side matrix operand
482  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
483  selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
484  {
485  smpAssign( y, x * A );
486  }
488  //**********************************************************************************************
489 
490  //**Default assignment to dense vectors*********************************************************
504  template< typename VT1 // Type of the left-hand side target vector
505  , typename VT2 // Type of the left-hand side vector operand
506  , typename MT1 > // Type of the right-hand side matrix operand
507  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
508  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
509  {
510  y.assign( x * A );
511  }
513  //**********************************************************************************************
514 
515  //**Vectorized default assignment to dense vectors**********************************************
529  template< typename VT1 // Type of the left-hand side target vector
530  , typename VT2 // Type of the left-hand side vector operand
531  , typename MT1 > // Type of the right-hand side matrix operand
532  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
533  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
534  {
535  typedef IntrinsicTrait<ElementType> IT;
536 
537  const size_t M( A.rows() );
538  const size_t N( A.columns() );
539 
540  size_t j( 0UL );
541 
542  for( ; (j+8UL) <= N; j+=8UL ) {
543  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
544  for( size_t i=0UL; i<M; i+=IT::size ) {
545  const IntrinsicType x1( x.load(i) );
546  xmm1 = xmm1 + x1 * A.load(i,j );
547  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
548  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
549  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
550  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
551  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
552  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
553  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
554  }
555  y[j ] = sum( xmm1 );
556  y[j+1UL] = sum( xmm2 );
557  y[j+2UL] = sum( xmm3 );
558  y[j+3UL] = sum( xmm4 );
559  y[j+4UL] = sum( xmm5 );
560  y[j+5UL] = sum( xmm6 );
561  y[j+6UL] = sum( xmm7 );
562  y[j+7UL] = sum( xmm8 );
563  }
564  for( ; (j+4UL) <= N; j+=4UL ) {
565  IntrinsicType xmm1, xmm2, xmm3, xmm4;
566  for( size_t i=0UL; i<M; i+=IT::size ) {
567  const IntrinsicType x1( x.load(i) );
568  xmm1 = xmm1 + x1 * A.load(i,j );
569  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
570  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
571  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
572  }
573  y[j ] = sum( xmm1 );
574  y[j+1UL] = sum( xmm2 );
575  y[j+2UL] = sum( xmm3 );
576  y[j+3UL] = sum( xmm4 );
577  }
578  for( ; (j+3UL) <= N; j+=3UL ) {
579  IntrinsicType xmm1, xmm2, xmm3;
580  for( size_t i=0UL; i<M; i+=IT::size ) {
581  const IntrinsicType x1( x.load(i) );
582  xmm1 = xmm1 + x1 * A.load(i,j );
583  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
584  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
585  }
586  y[j ] = sum( xmm1 );
587  y[j+1UL] = sum( xmm2 );
588  y[j+2UL] = sum( xmm3 );
589  }
590  for( ; (j+2UL) <= N; j+=2UL ) {
591  IntrinsicType xmm1, xmm2;
592  for( size_t i=0UL; i<M; i+=IT::size ) {
593  const IntrinsicType x1( x.load(i) );
594  xmm1 = xmm1 + x1 * A.load(i,j );
595  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
596  }
597  y[j ] = sum( xmm1 );
598  y[j+1UL] = sum( xmm2 );
599  }
600  if( j < N ) {
601  IntrinsicType xmm1;
602  for( size_t i=0UL; i<M; i+=IT::size ) {
603  xmm1 = xmm1 + A.load(i,j) * x.load(i);
604  }
605  y[j] = sum( xmm1 );
606  }
607  }
609  //**********************************************************************************************
610 
611  //**BLAS-based assignment to dense vectors (default)********************************************
625  template< typename VT1 // Type of the left-hand side target vector
626  , typename VT2 // Type of the left-hand side vector operand
627  , typename MT1 > // Type of the right-hand side matrix operand
628  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
629  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
630  {
631  selectDefaultAssignKernel( y, x, A );
632  }
634  //**********************************************************************************************
635 
636  //**BLAS-based assignment to dense vectors (single precision)***********************************
637 #if BLAZE_BLAS_MODE
638 
651  template< typename VT1 // Type of the left-hand side target vector
652  , typename VT2 // Type of the left-hand side vector operand
653  , typename MT1 > // Type of the right-hand side matrix operand
654  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
655  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
656  {
657  using boost::numeric_cast;
658 
662 
663  const int M ( numeric_cast<int>( A.rows() ) );
664  const int N ( numeric_cast<int>( A.columns() ) );
665  const int lda( numeric_cast<int>( A.spacing() ) );
666 
667  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
668  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
669  }
671 #endif
672  //**********************************************************************************************
673 
674  //**BLAS-based assignment to dense vectors (double precision)***********************************
675 #if BLAZE_BLAS_MODE
676 
689  template< typename VT1 // Type of the left-hand side target vector
690  , typename VT2 // Type of the left-hand side vector operand
691  , typename MT1 > // Type of the right-hand side matrix operand
692  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
693  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
694  {
695  using boost::numeric_cast;
696 
700 
701  const int M ( numeric_cast<int>( A.rows() ) );
702  const int N ( numeric_cast<int>( A.columns() ) );
703  const int lda( numeric_cast<int>( A.spacing() ) );
704 
705  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
706  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
707  }
709 #endif
710  //**********************************************************************************************
711 
712  //**BLAS-based assignment to dense vectors (single precision complex)***************************
713 #if BLAZE_BLAS_MODE
714 
727  template< typename VT1 // Type of the left-hand side target vector
728  , typename VT2 // Type of the left-hand side vector operand
729  , typename MT1 > // Type of the right-hand side matrix operand
730  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
731  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
732  {
733  using boost::numeric_cast;
734 
738  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
739  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
740  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
741 
742  const int M ( numeric_cast<int>( A.rows() ) );
743  const int N ( numeric_cast<int>( A.columns() ) );
744  const int lda( numeric_cast<int>( A.spacing() ) );
745  const complex<float> alpha( 1.0F, 0.0F );
746  const complex<float> beta ( 0.0F, 0.0F );
747 
748  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
749  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
750  }
752 #endif
753  //**********************************************************************************************
754 
755  //**BLAS-based assignment to dense vectors (double precision complex)***************************
756 #if BLAZE_BLAS_MODE
757 
770  template< typename VT1 // Type of the left-hand side target vector
771  , typename VT2 // Type of the left-hand side vector operand
772  , typename MT1 > // Type of the right-hand side matrix operand
773  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
774  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
775  {
776  using boost::numeric_cast;
777 
781  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
782  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
783  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
784 
785  const int M ( numeric_cast<int>( A.rows() ) );
786  const int N ( numeric_cast<int>( A.columns() ) );
787  const int lda( numeric_cast<int>( A.spacing() ) );
788  const complex<double> alpha( 1.0, 0.0 );
789  const complex<double> beta ( 0.0, 0.0 );
790 
791  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
792  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
793  }
795 #endif
796  //**********************************************************************************************
797 
798  //**Assignment to sparse vectors****************************************************************
811  template< typename VT1 > // Type of the target sparse vector
812  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
813  {
815 
819 
820  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
821 
822  const ResultType tmp( rhs );
823  smpAssign( ~lhs, tmp );
824  }
826  //**********************************************************************************************
827 
828  //**Addition assignment to dense vectors********************************************************
841  template< typename VT1 > // Type of the target dense vector
842  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
843  {
845 
846  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
847 
848  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
849  return;
850  }
851 
852  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
853  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
854 
855  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
856  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
857  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
858  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
859 
860  TDVecTDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
861  }
863  //**********************************************************************************************
864 
865  //**Addition assignment to dense vectors (kernel selection)*************************************
876  template< typename VT1 // Type of the left-hand side target vector
877  , typename VT2 // Type of the left-hand side vector operand
878  , typename MT1 > // Type of the right-hand side matrix operand
879  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
880  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
881  {
882  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
883  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
884  TDVecTDMatMultExpr::selectDefaultAddAssignKernel( y, x, A );
885  else
886  TDVecTDMatMultExpr::selectBlasAddAssignKernel( y, x, A );
887  }
889  //**********************************************************************************************
890 
891  //**Addition assignment to dense vectors (kernel selection)*************************************
902  template< typename VT1 // Type of the left-hand side target vector
903  , typename VT2 // Type of the left-hand side vector operand
904  , typename MT1 > // Type of the right-hand side matrix operand
905  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
906  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
907  {
908  smpAddAssign( y, x * A );
909  }
911  //**********************************************************************************************
912 
913  //**Default addition assignment to dense vectors************************************************
927  template< typename VT1 // Type of the left-hand side target vector
928  , typename VT2 // Type of the left-hand side vector operand
929  , typename MT1 > // Type of the right-hand side matrix operand
930  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
931  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
932  {
933  y.addAssign( x * A );
934  }
936  //**********************************************************************************************
937 
938  //**Vectorized default addition assignment to dense vectors*************************************
952  template< typename VT1 // Type of the left-hand side target vector
953  , typename VT2 // Type of the left-hand side vector operand
954  , typename MT1 > // Type of the right-hand side matrix operand
955  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
956  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
957  {
958  typedef IntrinsicTrait<ElementType> IT;
959 
960  const size_t M( A.rows() );
961  const size_t N( A.columns() );
962 
963  size_t j( 0UL );
964 
965  for( ; (j+8UL) <= N; j+=8UL ) {
966  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
967  for( size_t i=0UL; i<M; i+=IT::size ) {
968  const IntrinsicType x1( x.load(i) );
969  xmm1 = xmm1 + x1 * A.load(i,j );
970  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
971  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
972  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
973  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
974  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
975  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
976  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
977  }
978  y[j ] += sum( xmm1 );
979  y[j+1UL] += sum( xmm2 );
980  y[j+2UL] += sum( xmm3 );
981  y[j+3UL] += sum( xmm4 );
982  y[j+4UL] += sum( xmm5 );
983  y[j+5UL] += sum( xmm6 );
984  y[j+6UL] += sum( xmm7 );
985  y[j+7UL] += sum( xmm8 );
986  }
987  for( ; (j+4UL) <= N; j+=4UL ) {
988  IntrinsicType xmm1, xmm2, xmm3, xmm4;
989  for( size_t i=0UL; i<M; i+=IT::size ) {
990  const IntrinsicType x1( x.load(i) );
991  xmm1 = xmm1 + x1 * A.load(i,j );
992  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
993  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
994  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
995  }
996  y[j ] += sum( xmm1 );
997  y[j+1UL] += sum( xmm2 );
998  y[j+2UL] += sum( xmm3 );
999  y[j+3UL] += sum( xmm4 );
1000  }
1001  for( ; (j+3UL) <= N; j+=3UL ) {
1002  IntrinsicType xmm1, xmm2, xmm3;
1003  for( size_t i=0UL; i<M; i+=IT::size ) {
1004  const IntrinsicType x1( x.load(i) );
1005  xmm1 = xmm1 + x1 * A.load(i,j );
1006  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1007  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1008  }
1009  y[j ] += sum( xmm1 );
1010  y[j+1UL] += sum( xmm2 );
1011  y[j+2UL] += sum( xmm3 );
1012  }
1013  for( ; (j+2UL) <= N; j+=2UL ) {
1014  IntrinsicType xmm1, xmm2;
1015  for( size_t i=0UL; i<M; i+=IT::size ) {
1016  const IntrinsicType x1( x.load(i) );
1017  xmm1 = xmm1 + x1 * A.load(i,j );
1018  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1019  }
1020  y[j ] += sum( xmm1 );
1021  y[j+1UL] += sum( xmm2 );
1022  }
1023  if( j < N ) {
1024  IntrinsicType xmm1;
1025  for( size_t i=0UL; i<M; i+=IT::size ) {
1026  xmm1 = xmm1 + A.load(i,j) * x.load(i);
1027  }
1028  y[j] += sum( xmm1 );
1029  }
1030  }
1032  //**********************************************************************************************
1033 
1034  //**BLAS-based addition assignment to dense vectors (default)***********************************
1048  template< typename VT1 // Type of the left-hand side target vector
1049  , typename VT2 // Type of the left-hand side vector operand
1050  , typename MT1 > // Type of the right-hand side matrix operand
1051  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1052  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1053  {
1054  selectDefaultAddAssignKernel( y, x, A );
1055  }
1057  //**********************************************************************************************
1058 
1059  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1060 #if BLAZE_BLAS_MODE
1061 
1074  template< typename VT1 // Type of the left-hand side target vector
1075  , typename VT2 // Type of the left-hand side vector operand
1076  , typename MT1 > // Type of the right-hand side matrix operand
1077  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1078  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1079  {
1080  using boost::numeric_cast;
1081 
1085 
1086  const int M ( numeric_cast<int>( A.rows() ) );
1087  const int N ( numeric_cast<int>( A.columns() ) );
1088  const int lda( numeric_cast<int>( A.spacing() ) );
1089 
1090  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
1091  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1092  }
1094 #endif
1095  //**********************************************************************************************
1096 
1097  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1098 #if BLAZE_BLAS_MODE
1099 
1112  template< typename VT1 // Type of the left-hand side target vector
1113  , typename VT2 // Type of the left-hand side vector operand
1114  , typename MT1 > // Type of the right-hand side matrix operand
1115  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1116  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1117  {
1118  using boost::numeric_cast;
1119 
1123 
1124  const int M ( numeric_cast<int>( A.rows() ) );
1125  const int N ( numeric_cast<int>( A.columns() ) );
1126  const int lda( numeric_cast<int>( A.spacing() ) );
1127 
1128  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
1129  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1130  }
1132 #endif
1133  //**********************************************************************************************
1134 
1135  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1136 #if BLAZE_BLAS_MODE
1137 
1150  template< typename VT1 // Type of the left-hand side target vector
1151  , typename VT2 // Type of the left-hand side vector operand
1152  , typename MT1 > // Type of the right-hand side matrix operand
1153  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1154  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1155  {
1156  using boost::numeric_cast;
1157 
1161  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1162  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1163  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1164 
1165  const int M ( numeric_cast<int>( A.rows() ) );
1166  const int N ( numeric_cast<int>( A.columns() ) );
1167  const int lda( numeric_cast<int>( A.spacing() ) );
1168  const complex<float> alpha( 1.0F, 0.0F );
1169  const complex<float> beta ( 1.0F, 0.0F );
1170 
1171  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1172  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1173  }
1175 #endif
1176  //**********************************************************************************************
1177 
1178  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1179 #if BLAZE_BLAS_MODE
1180 
1193  template< typename VT1 // Type of the left-hand side target vector
1194  , typename VT2 // Type of the left-hand side vector operand
1195  , typename MT1 > // Type of the right-hand side matrix operand
1196  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1197  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1198  {
1199  using boost::numeric_cast;
1200 
1204  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1205  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1206  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1207 
1208  const int M ( numeric_cast<int>( A.rows() ) );
1209  const int N ( numeric_cast<int>( A.columns() ) );
1210  const int lda( numeric_cast<int>( A.spacing() ) );
1211  const complex<double> alpha( 1.0, 0.0 );
1212  const complex<double> beta ( 1.0, 0.0 );
1213 
1214  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1215  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1216  }
1218 #endif
1219  //**********************************************************************************************
1220 
1221  //**Addition assignment to sparse vectors*******************************************************
1222  // No special implementation for the addition assignment to sparse vectors.
1223  //**********************************************************************************************
1224 
1225  //**Subtraction assignment to dense vectors*****************************************************
1238  template< typename VT1 > // Type of the target dense vector
1239  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1240  {
1242 
1243  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1244 
1245  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1246  return;
1247  }
1248 
1249  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1250  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1251 
1252  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1253  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1254  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1255  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1256 
1257  TDVecTDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1258  }
1260  //**********************************************************************************************
1261 
1262  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1273  template< typename VT1 // Type of the left-hand side target vector
1274  , typename VT2 // Type of the left-hand side vector operand
1275  , typename MT1 > // Type of the right-hand side matrix operand
1276  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
1277  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1278  {
1279  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1280  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1281  TDVecTDMatMultExpr::selectDefaultSubAssignKernel( y, x, A );
1282  else
1283  TDVecTDMatMultExpr::selectBlasSubAssignKernel( y, x, A );
1284  }
1286  //**********************************************************************************************
1287 
1288  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1299  template< typename VT1 // Type of the left-hand side target vector
1300  , typename VT2 // Type of the left-hand side vector operand
1301  , typename MT1 > // Type of the right-hand side matrix operand
1302  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
1303  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1304  {
1305  smpSubAssign( y, x * A );
1306  }
1308  //**********************************************************************************************
1309 
1310  //**Default subtraction assignment to dense vectors*********************************************
1324  template< typename VT1 // Type of the left-hand side target vector
1325  , typename VT2 // Type of the left-hand side vector operand
1326  , typename MT1 > // Type of the right-hand side matrix operand
1327  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1328  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1329  {
1330  y.subAssign( x * A );
1331  }
1333  //**********************************************************************************************
1334 
1335  //**Vectorized default subtraction assignment to dense vectors**********************************
1349  template< typename VT1 // Type of the left-hand side target vector
1350  , typename VT2 // Type of the left-hand side vector operand
1351  , typename MT1 > // Type of the right-hand side matrix operand
1352  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1353  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1354  {
1355  typedef IntrinsicTrait<ElementType> IT;
1356 
1357  const size_t M( A.rows() );
1358  const size_t N( A.columns() );
1359 
1360  size_t j( 0UL );
1361 
1362  for( ; (j+8UL) <= N; j+=8UL ) {
1363  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1364  for( size_t i=0UL; i<M; i+=IT::size ) {
1365  const IntrinsicType x1( x.load(i) );
1366  xmm1 = xmm1 + x1 * A.load(i,j );
1367  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1368  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1369  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1370  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
1371  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
1372  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
1373  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
1374  }
1375  y[j ] -= sum( xmm1 );
1376  y[j+1UL] -= sum( xmm2 );
1377  y[j+2UL] -= sum( xmm3 );
1378  y[j+3UL] -= sum( xmm4 );
1379  y[j+4UL] -= sum( xmm5 );
1380  y[j+5UL] -= sum( xmm6 );
1381  y[j+6UL] -= sum( xmm7 );
1382  y[j+7UL] -= sum( xmm8 );
1383  }
1384  for( ; (j+4UL) <= N; j+=4UL ) {
1385  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1386  for( size_t i=0UL; i<M; i+=IT::size ) {
1387  const IntrinsicType x1( x.load(i) );
1388  xmm1 = xmm1 + x1 * A.load(i,j );
1389  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1390  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1391  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1392  }
1393  y[j ] -= sum( xmm1 );
1394  y[j+1UL] -= sum( xmm2 );
1395  y[j+2UL] -= sum( xmm3 );
1396  y[j+3UL] -= sum( xmm4 );
1397  }
1398  for( ; (j+3UL) <= N; j+=3UL ) {
1399  IntrinsicType xmm1, xmm2, xmm3;
1400  for( size_t i=0UL; i<M; i+=IT::size ) {
1401  const IntrinsicType x1( x.load(i) );
1402  xmm1 = xmm1 + x1 * A.load(i,j );
1403  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1404  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1405  }
1406  y[j ] -= sum( xmm1 );
1407  y[j+1UL] -= sum( xmm2 );
1408  y[j+2UL] -= sum( xmm3 );
1409  }
1410  for( ; (j+2UL) <= N; j+=2UL ) {
1411  IntrinsicType xmm1, xmm2;
1412  for( size_t i=0UL; i<M; i+=IT::size ) {
1413  const IntrinsicType x1( x.load(i) );
1414  xmm1 = xmm1 + x1 * A.load(i,j );
1415  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1416  }
1417  y[j ] -= sum( xmm1 );
1418  y[j+1UL] -= sum( xmm2 );
1419  }
1420  if( j < N ) {
1421  IntrinsicType xmm1;
1422  for( size_t i=0UL; i<M; i+=IT::size ) {
1423  xmm1 = xmm1 + A.load(i,j) * x.load(i);
1424  }
1425  y[j] -= sum( xmm1 );
1426  }
1427  }
1429  //**********************************************************************************************
1430 
1431  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1445  template< typename VT1 // Type of the left-hand side target vector
1446  , typename VT2 // Type of the left-hand side vector operand
1447  , typename MT1 > // Type of the right-hand side matrix operand
1448  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1449  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1450  {
1451  selectDefaultSubAssignKernel( y, x, A );
1452  }
1454  //**********************************************************************************************
1455 
1456  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1457 #if BLAZE_BLAS_MODE
1458 
1471  template< typename VT1 // Type of the left-hand side target vector
1472  , typename VT2 // Type of the left-hand side vector operand
1473  , typename MT1 > // Type of the right-hand side matrix operand
1474  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1475  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1476  {
1477  using boost::numeric_cast;
1478 
1482 
1483  const int M ( numeric_cast<int>( A.rows() ) );
1484  const int N ( numeric_cast<int>( A.columns() ) );
1485  const int lda( numeric_cast<int>( A.spacing() ) );
1486 
1487  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -1.0F,
1488  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1489  }
1491 #endif
1492  //**********************************************************************************************
1493 
1494  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1495 #if BLAZE_BLAS_MODE
1496 
1509  template< typename VT1 // Type of the left-hand side target vector
1510  , typename VT2 // Type of the left-hand side vector operand
1511  , typename MT1 > // Type of the right-hand side matrix operand
1512  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1513  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1514  {
1515  using boost::numeric_cast;
1516 
1520 
1521  const int M ( numeric_cast<int>( A.rows() ) );
1522  const int N ( numeric_cast<int>( A.columns() ) );
1523  const int lda( numeric_cast<int>( A.spacing() ) );
1524 
1525  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -1.0,
1526  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1527  }
1529 #endif
1530  //**********************************************************************************************
1531 
1532  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1533 #if BLAZE_BLAS_MODE
1534 
1547  template< typename VT1 // Type of the left-hand side target vector
1548  , typename VT2 // Type of the left-hand side vector operand
1549  , typename MT1 > // Type of the right-hand side matrix operand
1550  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1551  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1552  {
1553  using boost::numeric_cast;
1554 
1558  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1559  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1560  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1561 
1562  const int M ( numeric_cast<int>( A.rows() ) );
1563  const int N ( numeric_cast<int>( A.columns() ) );
1564  const int lda( numeric_cast<int>( A.spacing() ) );
1565  const complex<float> alpha( -1.0F, 0.0F );
1566  const complex<float> beta ( 1.0F, 0.0F );
1567 
1568  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1569  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1570  }
1572 #endif
1573  //**********************************************************************************************
1574 
1575  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1576 #if BLAZE_BLAS_MODE
1577 
1590  template< typename VT1 // Type of the left-hand side target vector
1591  , typename VT2 // Type of the left-hand side vector operand
1592  , typename MT1 > // Type of the right-hand side matrix operand
1593  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1594  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1595  {
1596  using boost::numeric_cast;
1597 
1601  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1602  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1603  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1604 
1605  const int M ( numeric_cast<int>( A.rows() ) );
1606  const int N ( numeric_cast<int>( A.columns() ) );
1607  const int lda( numeric_cast<int>( A.spacing() ) );
1608  const complex<double> alpha( -1.0, 0.0 );
1609  const complex<double> beta ( 1.0, 0.0 );
1610 
1611  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1612  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1613  }
1615 #endif
1616  //**********************************************************************************************
1617 
1618  //**Subtraction assignment to sparse vectors****************************************************
1619  // No special implementation for the subtraction assignment to sparse vectors.
1620  //**********************************************************************************************
1621 
1622  //**Multiplication assignment to dense vectors**************************************************
1635  template< typename VT1 > // Type of the target dense vector
1636  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1637  {
1639 
1643 
1644  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1645 
1646  const ResultType tmp( rhs );
1647  multAssign( ~lhs, tmp );
1648  }
1650  //**********************************************************************************************
1651 
1652  //**Multiplication assignment to sparse vectors*************************************************
1653  // No special implementation for the multiplication assignment to sparse vectors.
1654  //**********************************************************************************************
1655 
1656  //**Compile time checks*************************************************************************
1663  //**********************************************************************************************
1664 };
1665 //*************************************************************************************************
1666 
1667 
1668 
1669 
1670 //=================================================================================================
1671 //
1672 // DVECSCALARMULTEXPR SPECIALIZATION
1673 //
1674 //=================================================================================================
1675 
1676 //*************************************************************************************************
1684 template< typename VT // Type of the left-hand side dense vector
1685  , typename MT // Type of the right-hand side dense matrix
1686  , typename ST > // Type of the side scalar value
1687 class DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >
1688  : public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
1689  , private VecScalarMultExpr
1690  , private Computation
1691 {
1692  private:
1693  //**Type definitions****************************************************************************
1694  typedef TDVecTDMatMultExpr<VT,MT> VMM;
1695  typedef typename VMM::ResultType RES;
1696  typedef typename VT::ResultType VRT;
1697  typedef typename MT::ResultType MRT;
1698  typedef typename VRT::ElementType VET;
1699  typedef typename MRT::ElementType MET;
1700  typedef typename VT::CompositeType VCT;
1701  typedef typename MT::CompositeType MCT;
1702  //**********************************************************************************************
1703 
1704  //**********************************************************************************************
1706  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1707  //**********************************************************************************************
1708 
1709  //**********************************************************************************************
1711  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1712  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1713  //**********************************************************************************************
1714 
1715  //**********************************************************************************************
1717 
1719  template< typename T1, typename T2, typename T3, typename T4 >
1720  struct UseSMPAssignKernel {
1721  enum { value = evaluateVector || evaluateMatrix };
1722  };
1723  //**********************************************************************************************
1724 
1725  //**********************************************************************************************
1727 
1730  template< typename T1, typename T2, typename T3, typename T4 >
1731  struct UseSinglePrecisionKernel {
1732  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1733  IsFloat<typename T1::ElementType>::value &&
1734  IsFloat<typename T2::ElementType>::value &&
1735  IsFloat<typename T3::ElementType>::value &&
1736  !IsComplex<T4>::value };
1737  };
1738  //**********************************************************************************************
1739 
1740  //**********************************************************************************************
1742 
1745  template< typename T1, typename T2, typename T3, typename T4 >
1746  struct UseDoublePrecisionKernel {
1747  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1748  IsDouble<typename T1::ElementType>::value &&
1749  IsDouble<typename T2::ElementType>::value &&
1750  IsDouble<typename T3::ElementType>::value &&
1751  !IsComplex<T4>::value };
1752  };
1753  //**********************************************************************************************
1754 
1755  //**********************************************************************************************
1757 
1760  template< typename T1, typename T2, typename T3 >
1761  struct UseSinglePrecisionComplexKernel {
1762  typedef complex<float> Type;
1763  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1764  IsSame<typename T1::ElementType,Type>::value &&
1765  IsSame<typename T2::ElementType,Type>::value &&
1766  IsSame<typename T3::ElementType,Type>::value };
1767  };
1768  //**********************************************************************************************
1769 
1770  //**********************************************************************************************
1772 
1775  template< typename T1, typename T2, typename T3 >
1776  struct UseDoublePrecisionComplexKernel {
1777  typedef complex<double> Type;
1778  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1779  IsSame<typename T1::ElementType,Type>::value &&
1780  IsSame<typename T2::ElementType,Type>::value &&
1781  IsSame<typename T3::ElementType,Type>::value };
1782  };
1783  //**********************************************************************************************
1784 
1785  //**********************************************************************************************
1787 
1789  template< typename T1, typename T2, typename T3, typename T4 >
1790  struct UseDefaultKernel {
1791  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1792  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1793  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1794  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1795  };
1796  //**********************************************************************************************
1797 
1798  //**********************************************************************************************
1800 
1803  template< typename T1, typename T2, typename T3, typename T4 >
1804  struct UseVectorizedDefaultKernel {
1805  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1806  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1807  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1808  IsSame<typename T1::ElementType,T4>::value &&
1809  IntrinsicTrait<typename T1::ElementType>::addition &&
1810  IntrinsicTrait<typename T1::ElementType>::multiplication };
1811  };
1812  //**********************************************************************************************
1813 
1814  public:
1815  //**Type definitions****************************************************************************
1816  typedef DVecScalarMultExpr<VMM,ST,true> This;
1817  typedef typename MultTrait<RES,ST>::Type ResultType;
1818  typedef typename ResultType::TransposeType TransposeType;
1819  typedef typename ResultType::ElementType ElementType;
1820  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1821  typedef const ElementType ReturnType;
1822  typedef const ResultType CompositeType;
1823 
1825  typedef const TDVecTDMatMultExpr<VT,MT> LeftOperand;
1826 
1828  typedef ST RightOperand;
1829 
1831  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
1832 
1834  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
1835  //**********************************************************************************************
1836 
1837  //**Compilation flags***************************************************************************
1839  enum { vectorizable = VT::vectorizable && MT::vectorizable &&
1840  IsSame<VET,MET>::value &&
1841  IsSame<VET,ST>::value &&
1842  IntrinsicTrait<VET>::addition &&
1843  IntrinsicTrait<VET>::multiplication };
1844 
1846  enum { smpAssignable = !evaluateVector && !evaluateMatrix };
1847  //**********************************************************************************************
1848 
1849  //**Constructor*********************************************************************************
1855  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1856  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1857  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1858  {}
1859  //**********************************************************************************************
1860 
1861  //**Subscript operator**************************************************************************
1867  inline ReturnType operator[]( size_t index ) const {
1868  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1869  return vector_[index] * scalar_;
1870  }
1871  //**********************************************************************************************
1872 
1873  //**Size function*******************************************************************************
1878  inline size_t size() const {
1879  return vector_.size();
1880  }
1881  //**********************************************************************************************
1882 
1883  //**Left operand access*************************************************************************
1888  inline LeftOperand leftOperand() const {
1889  return vector_;
1890  }
1891  //**********************************************************************************************
1892 
1893  //**Right operand access************************************************************************
1898  inline RightOperand rightOperand() const {
1899  return scalar_;
1900  }
1901  //**********************************************************************************************
1902 
1903  //**********************************************************************************************
1909  template< typename T >
1910  inline bool canAlias( const T* alias ) const {
1911  return vector_.canAlias( alias );
1912  }
1913  //**********************************************************************************************
1914 
1915  //**********************************************************************************************
1921  template< typename T >
1922  inline bool isAliased( const T* alias ) const {
1923  return vector_.isAliased( alias );
1924  }
1925  //**********************************************************************************************
1926 
1927  //**********************************************************************************************
1932  inline bool isAligned() const {
1933  return vector_.isAligned();
1934  }
1935  //**********************************************************************************************
1936 
1937  //**********************************************************************************************
1942  inline bool canSMPAssign() const {
1943  typename VMM::RightOperand A( vector_.rightOperand() );
1944  return ( !BLAZE_BLAS_IS_PARALLEL ||
1945  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1946  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) ) &&
1948  }
1949  //**********************************************************************************************
1950 
1951  private:
1952  //**Member variables****************************************************************************
1953  LeftOperand vector_;
1954  RightOperand scalar_;
1955  //**********************************************************************************************
1956 
1957  //**Assignment to dense vectors*****************************************************************
1969  template< typename VT1 // Type of the target dense vector
1970  , bool TF > // Transpose flag of the target dense vector
1971  friend inline void assign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
1972  {
1974 
1975  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1976 
1977  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1978  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1979 
1980  if( right.rows() == 0UL ) {
1981  reset( ~lhs );
1982  return;
1983  }
1984  else if( right.columns() == 0UL ) {
1985  return;
1986  }
1987 
1988  LT x( left ); // Evaluation of the left-hand side dense vector operand
1989  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1990 
1991  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1992  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1993  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1994  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1995 
1996  DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
1997  }
1998  //**********************************************************************************************
1999 
2000  //**Assignment to dense vectors (kernel selection)**********************************************
2011  template< typename VT1 // Type of the left-hand side target vector
2012  , typename VT2 // Type of the left-hand side vector operand
2013  , typename MT1 // Type of the right-hand side matrix operand
2014  , typename ST2 > // Type of the scalar value
2015  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2016  selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2017  {
2018  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2019  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2020  DVecScalarMultExpr::selectDefaultAssignKernel( y, x, A, scalar );
2021  else
2022  DVecScalarMultExpr::selectBlasAssignKernel( y, x, A, scalar );
2023  }
2024  //**********************************************************************************************
2025 
2026  //**Assignment to dense vectors (kernel selection)**********************************************
2037  template< typename VT1 // Type of the left-hand side target vector
2038  , typename VT2 // Type of the left-hand side vector operand
2039  , typename MT1 // Type of the right-hand side matrix operand
2040  , typename ST2 > // Type of the scalar value
2041  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2042  selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2043  {
2044  smpAssign( y, x * A * scalar );
2045  }
2046  //**********************************************************************************************
2047 
2048  //**Default assignment to dense vectors*********************************************************
2062  template< typename VT1 // Type of the left-hand side target vector
2063  , typename VT2 // Type of the left-hand side vector operand
2064  , typename MT1 // Type of the right-hand side matrix operand
2065  , typename ST2 > // Type of the scalar value
2066  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2067  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2068  {
2069  y.assign( x * A * scalar );
2070  }
2071  //**********************************************************************************************
2072 
2073  //**Vectorized default assignment to dense vectors**********************************************
2087  template< typename VT1 // Type of the left-hand side target vector
2088  , typename VT2 // Type of the left-hand side vector operand
2089  , typename MT1 // Type of the right-hand side matrix operand
2090  , typename ST2 > // Type of the scalar value
2091  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2092  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2093  {
2094  typedef IntrinsicTrait<ElementType> IT;
2095 
2096  const size_t M( A.rows() );
2097  const size_t N( A.columns() );
2098 
2099  size_t j( 0UL );
2100 
2101  for( ; (j+8UL) <= N; j+=8UL ) {
2102  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2103  for( size_t i=0UL; i<M; i+=IT::size ) {
2104  const IntrinsicType x1( x.load(i) );
2105  xmm1 = xmm1 + x1 * A.load(i,j );
2106  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2107  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2108  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2109  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
2110  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
2111  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
2112  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
2113  }
2114  y[j ] = sum( xmm1 ) * scalar;
2115  y[j+1UL] = sum( xmm2 ) * scalar;
2116  y[j+2UL] = sum( xmm3 ) * scalar;
2117  y[j+3UL] = sum( xmm4 ) * scalar;
2118  y[j+4UL] = sum( xmm5 ) * scalar;
2119  y[j+5UL] = sum( xmm6 ) * scalar;
2120  y[j+6UL] = sum( xmm7 ) * scalar;
2121  y[j+7UL] = sum( xmm8 ) * scalar;
2122  }
2123  for( ; (j+4UL) <= N; j+=4UL ) {
2124  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2125  for( size_t i=0UL; i<M; i+=IT::size ) {
2126  const IntrinsicType x1( x.load(i) );
2127  xmm1 = xmm1 + x1 * A.load(i,j );
2128  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2129  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2130  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2131  }
2132  y[j ] = sum( xmm1 ) * scalar;
2133  y[j+1UL] = sum( xmm2 ) * scalar;
2134  y[j+2UL] = sum( xmm3 ) * scalar;
2135  y[j+3UL] = sum( xmm4 ) * scalar;
2136  }
2137  for( ; (j+3UL) <= N; j+=3UL ) {
2138  IntrinsicType xmm1, xmm2, xmm3;
2139  for( size_t i=0UL; i<M; i+=IT::size ) {
2140  const IntrinsicType x1( x.load(i) );
2141  xmm1 = xmm1 + x1 * A.load(i,j );
2142  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2143  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2144  }
2145  y[j ] = sum( xmm1 ) * scalar;
2146  y[j+1UL] = sum( xmm2 ) * scalar;
2147  y[j+2UL] = sum( xmm3 ) * scalar;
2148  }
2149  for( ; (j+2UL) <= N; j+=2UL ) {
2150  IntrinsicType xmm1, xmm2;
2151  for( size_t i=0UL; i<M; i+=IT::size ) {
2152  const IntrinsicType x1( x.load(i) );
2153  xmm1 = xmm1 + x1 * A.load(i,j );
2154  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2155  }
2156  y[j ] = sum( xmm1 ) * scalar;
2157  y[j+1UL] = sum( xmm2 ) * scalar;
2158  }
2159  if( j < N ) {
2160  IntrinsicType xmm1;
2161  for( size_t i=0UL; i<M; i+=IT::size ) {
2162  xmm1 = xmm1 + A.load(i,j) * x.load(i);
2163  }
2164  y[j] = sum( xmm1 ) * scalar;
2165  }
2166  }
2167  //**********************************************************************************************
2168 
2169  //**BLAS-based assignment to dense vectors (default)********************************************
2182  template< typename VT1 // Type of the left-hand side target vector
2183  , typename VT2 // Type of the left-hand side vector operand
2184  , typename MT1 // Type of the right-hand side matrix operand
2185  , typename ST2 > // Type of the scalar value
2186  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2187  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2188  {
2189  selectDefaultAssignKernel( y, x, A, scalar );
2190  }
2191  //**********************************************************************************************
2192 
2193  //**BLAS-based assignment to dense vectors (single precision)***********************************
2194 #if BLAZE_BLAS_MODE
2195 
2208  template< typename VT1 // Type of the left-hand side target vector
2209  , typename VT2 // Type of the left-hand side vector operand
2210  , typename MT1 // Type of the right-hand side matrix operand
2211  , typename ST2 > // Type of the scalar value
2212  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2213  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2214  {
2215  using boost::numeric_cast;
2216 
2220 
2221  const int M ( numeric_cast<int>( A.rows() ) );
2222  const int N ( numeric_cast<int>( A.columns() ) );
2223  const int lda( numeric_cast<int>( A.spacing() ) );
2224 
2225  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
2226  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2227  }
2228 #endif
2229  //**********************************************************************************************
2230 
2231  //**BLAS-based assignment to dense vectors (double precision)***********************************
2232 #if BLAZE_BLAS_MODE
2233 
2246  template< typename VT1 // Type of the left-hand side target vector
2247  , typename VT2 // Type of the left-hand side vector operand
2248  , typename MT1 // Type of the right-hand side matrix operand
2249  , typename ST2 > // Type of the scalar value
2250  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2251  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2252  {
2253  using boost::numeric_cast;
2254 
2258 
2259  const int M ( numeric_cast<int>( A.rows() ) );
2260  const int N ( numeric_cast<int>( A.columns() ) );
2261  const int lda( numeric_cast<int>( A.spacing() ) );
2262 
2263  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2264  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2265  }
2266 #endif
2267  //**********************************************************************************************
2268 
2269  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2270 #if BLAZE_BLAS_MODE
2271 
2285  template< typename VT1 // Type of the left-hand side target vector
2286  , typename VT2 // Type of the left-hand side vector operand
2287  , typename MT1 // Type of the right-hand side matrix operand
2288  , typename ST2 > // Type of the scalar value
2289  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2290  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2291  {
2292  using boost::numeric_cast;
2293 
2297  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2298  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2299  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2300 
2301  const int M ( numeric_cast<int>( A.rows() ) );
2302  const int N ( numeric_cast<int>( A.columns() ) );
2303  const int lda( numeric_cast<int>( A.spacing() ) );
2304  const complex<float> alpha( scalar );
2305  const complex<float> beta ( 0.0F, 0.0F );
2306 
2307  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2308  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2309  }
2310 #endif
2311  //**********************************************************************************************
2312 
2313  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2314 #if BLAZE_BLAS_MODE
2315 
2329  template< typename VT1 // Type of the left-hand side target vector
2330  , typename VT2 // Type of the left-hand side vector operand
2331  , typename MT1 // Type of the right-hand side matrix operand
2332  , typename ST2 > // Type of the scalar value
2333  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2334  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2335  {
2336  using boost::numeric_cast;
2337 
2341  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2342  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2343  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2344 
2345  const int M ( numeric_cast<int>( A.rows() ) );
2346  const int N ( numeric_cast<int>( A.columns() ) );
2347  const int lda( numeric_cast<int>( A.spacing() ) );
2348  const complex<double> alpha( scalar );
2349  const complex<double> beta ( 0.0, 0.0 );
2350 
2351  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2352  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2353  }
2354 #endif
2355  //**********************************************************************************************
2356 
2357  //**Assignment to sparse vectors****************************************************************
2369  template< typename VT1 // Type of the target sparse vector
2370  , bool TF > // Transpose flag of the target sparse vector
2371  friend inline void assign( SparseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2372  {
2374 
2378 
2379  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2380 
2381  const ResultType tmp( rhs );
2382  smpAssign( ~lhs, tmp );
2383  }
2384  //**********************************************************************************************
2385 
2386  //**Addition assignment to dense vectors********************************************************
2398  template< typename VT1 // Type of the target dense vector
2399  , bool TF > // Transpose flag of the target dense vector
2400  friend inline void addAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2401  {
2403 
2404  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2405 
2406  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2407  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2408 
2409  if( right.rows() == 0UL || right.columns() == 0UL ) {
2410  return;
2411  }
2412 
2413  LT x( left ); // Evaluation of the left-hand side dense vector operand
2414  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2415 
2416  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2417  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2418  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2419  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2420 
2421  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2422  }
2423  //**********************************************************************************************
2424 
2425  //**Addition assignment to dense vectors (kernel selection)*************************************
2436  template< typename VT1 // Type of the left-hand side target vector
2437  , typename VT2 // Type of the left-hand side vector operand
2438  , typename MT1 // Type of the right-hand side matrix operand
2439  , typename ST2 > // Type of the scalar value
2440  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2441  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2442  {
2443  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2444  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2445  DVecScalarMultExpr::selectDefaultAddAssignKernel( y, x, A, scalar );
2446  else
2447  DVecScalarMultExpr::selectBlasAddAssignKernel( y, x, A, scalar );
2448  }
2449  //**********************************************************************************************
2450 
2451  //**Addition assignment to dense vectors (kernel selection)*************************************
2462  template< typename VT1 // Type of the left-hand side target vector
2463  , typename VT2 // Type of the left-hand side vector operand
2464  , typename MT1 // Type of the right-hand side matrix operand
2465  , typename ST2 > // Type of the scalar value
2466  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2467  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2468  {
2469  smpAddAssign( y, x * A * scalar );
2470  }
2471  //**********************************************************************************************
2472 
2473  //**Default addition assignment to dense vectors************************************************
2487  template< typename VT1 // Type of the left-hand side target vector
2488  , typename VT2 // Type of the left-hand side vector operand
2489  , typename MT1 // Type of the right-hand side matrix operand
2490  , typename ST2 > // Type of the scalar value
2491  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2492  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2493  {
2494  y.addAssign( x * A * scalar );
2495  }
2496  //**********************************************************************************************
2497 
2498  //**Vectorized default addition assignment to dense vectors*************************************
2512  template< typename VT1 // Type of the left-hand side target vector
2513  , typename VT2 // Type of the left-hand side vector operand
2514  , typename MT1 // Type of the right-hand side matrix operand
2515  , typename ST2 > // Type of the scalar value
2516  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2517  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2518  {
2519  typedef IntrinsicTrait<ElementType> IT;
2520 
2521  const size_t M( A.rows() );
2522  const size_t N( A.columns() );
2523 
2524  size_t j( 0UL );
2525 
2526  for( ; (j+8UL) <= N; j+=8UL ) {
2527  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2528  for( size_t i=0UL; i<M; i+=IT::size ) {
2529  const IntrinsicType x1( x.load(i) );
2530  xmm1 = xmm1 + x1 * A.load(i,j );
2531  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2532  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2533  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2534  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
2535  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
2536  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
2537  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
2538  }
2539  y[j ] += sum( xmm1 ) * scalar;
2540  y[j+1UL] += sum( xmm2 ) * scalar;
2541  y[j+2UL] += sum( xmm3 ) * scalar;
2542  y[j+3UL] += sum( xmm4 ) * scalar;
2543  y[j+4UL] += sum( xmm5 ) * scalar;
2544  y[j+5UL] += sum( xmm6 ) * scalar;
2545  y[j+6UL] += sum( xmm7 ) * scalar;
2546  y[j+7UL] += sum( xmm8 ) * scalar;
2547  }
2548  for( ; (j+4UL) <= N; j+=4UL ) {
2549  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2550  for( size_t i=0UL; i<M; i+=IT::size ) {
2551  const IntrinsicType x1( x.load(i) );
2552  xmm1 = xmm1 + x1 * A.load(i,j );
2553  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2554  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2555  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2556  }
2557  y[j ] += sum( xmm1 ) * scalar;
2558  y[j+1UL] += sum( xmm2 ) * scalar;
2559  y[j+2UL] += sum( xmm3 ) * scalar;
2560  y[j+3UL] += sum( xmm4 ) * scalar;
2561  }
2562  for( ; (j+3UL) <= N; j+=3UL ) {
2563  IntrinsicType xmm1, xmm2, xmm3;
2564  for( size_t i=0UL; i<M; i+=IT::size ) {
2565  const IntrinsicType x1( x.load(i) );
2566  xmm1 = xmm1 + x1 * A.load(i,j );
2567  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2568  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2569  }
2570  y[j ] += sum( xmm1 ) * scalar;
2571  y[j+1UL] += sum( xmm2 ) * scalar;
2572  y[j+2UL] += sum( xmm3 ) * scalar;
2573  }
2574  for( ; (j+2UL) <= N; j+=2UL ) {
2575  IntrinsicType xmm1, xmm2;
2576  for( size_t i=0UL; i<M; i+=IT::size ) {
2577  const IntrinsicType x1( x.load(i) );
2578  xmm1 = xmm1 + x1 * A.load(i,j );
2579  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2580  }
2581  y[j ] += sum( xmm1 ) * scalar;
2582  y[j+1UL] += sum( xmm2 ) * scalar;
2583  }
2584  if( j < N ) {
2585  IntrinsicType xmm1;
2586  for( size_t i=0UL; i<M; i+=IT::size ) {
2587  xmm1 = xmm1 + A.load(i,j) * x.load(i);
2588  }
2589  y[j] += sum( xmm1 ) * scalar;
2590  }
2591  }
2592  //**********************************************************************************************
2593 
2594  //**BLAS-based addition assignment to dense vectors (default)***********************************
2608  template< typename VT1 // Type of the left-hand side target vector
2609  , typename VT2 // Type of the left-hand side vector operand
2610  , typename MT1 // Type of the right-hand side matrix operand
2611  , typename ST2 > // Type of the scalar value
2612  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2613  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2614  {
2615  selectDefaultAddAssignKernel( y, x, A, scalar );
2616  }
2617  //**********************************************************************************************
2618 
2619  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2620 #if BLAZE_BLAS_MODE
2621 
2634  template< typename VT1 // Type of the left-hand side target vector
2635  , typename VT2 // Type of the left-hand side vector operand
2636  , typename MT1 // Type of the right-hand side matrix operand
2637  , typename ST2 > // Type of the scalar value
2638  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2639  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2640  {
2641  using boost::numeric_cast;
2642 
2646 
2647  const int M ( numeric_cast<int>( A.rows() ) );
2648  const int N ( numeric_cast<int>( A.columns() ) );
2649  const int lda( numeric_cast<int>( A.spacing() ) );
2650 
2651  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
2652  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2653  }
2654 #endif
2655  //**********************************************************************************************
2656 
2657  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2658 #if BLAZE_BLAS_MODE
2659 
2672  template< typename VT1 // Type of the left-hand side target vector
2673  , typename VT2 // Type of the left-hand side vector operand
2674  , typename MT1 // Type of the right-hand side matrix operand
2675  , typename ST2 > // Type of the scalar value
2676  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2677  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2678  {
2679  using boost::numeric_cast;
2680 
2684 
2685  const int M ( numeric_cast<int>( A.rows() ) );
2686  const int N ( numeric_cast<int>( A.columns() ) );
2687  const int lda( numeric_cast<int>( A.spacing() ) );
2688 
2689  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2690  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2691  }
2692 #endif
2693  //**********************************************************************************************
2694 
2695  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2696 #if BLAZE_BLAS_MODE
2697 
2711  template< typename VT1 // Type of the left-hand side target vector
2712  , typename VT2 // Type of the left-hand side vector operand
2713  , typename MT1 // Type of the right-hand side matrix operand
2714  , typename ST2 > // Type of the scalar value
2715  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2716  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2717  {
2718  using boost::numeric_cast;
2719 
2723  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2724  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2725  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2726 
2727  const int M ( numeric_cast<int>( A.rows() ) );
2728  const int N ( numeric_cast<int>( A.columns() ) );
2729  const int lda( numeric_cast<int>( A.spacing() ) );
2730  const complex<float> alpha( scalar );
2731  const complex<float> beta ( 1.0F, 0.0F );
2732 
2733  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2734  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2735  }
2736 #endif
2737  //**********************************************************************************************
2738 
2739  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2740 #if BLAZE_BLAS_MODE
2741 
2755  template< typename VT1 // Type of the left-hand side target vector
2756  , typename VT2 // Type of the left-hand side vector operand
2757  , typename MT1 // Type of the right-hand side matrix operand
2758  , typename ST2 > // Type of the scalar value
2759  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2760  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2761  {
2762  using boost::numeric_cast;
2763 
2767  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2768  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2769  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2770 
2771  const int M ( numeric_cast<int>( A.rows() ) );
2772  const int N ( numeric_cast<int>( A.columns() ) );
2773  const int lda( numeric_cast<int>( A.spacing() ) );
2774  const complex<double> alpha( scalar );
2775  const complex<double> beta ( 1.0, 0.0 );
2776 
2777  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2778  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2779  }
2780 #endif
2781  //**********************************************************************************************
2782 
2783  //**Addition assignment to sparse vectors*******************************************************
2784  // No special implementation for the addition assignment to sparse vectors.
2785  //**********************************************************************************************
2786 
2787  //**Subtraction assignment to dense vectors*****************************************************
2799  template< typename VT1 // Type of the target dense vector
2800  , bool TF > // Transpose flag of the target dense vector
2801  friend inline void subAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2802  {
2804 
2805  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2806 
2807  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2808  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2809 
2810  if( right.rows() == 0UL || right.columns() == 0UL ) {
2811  return;
2812  }
2813 
2814  LT x( left ); // Evaluation of the left-hand side dense vector operand
2815  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2816 
2817  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2818  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2819  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2820  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2821 
2822  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2823  }
2824  //**********************************************************************************************
2825 
2826  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2837  template< typename VT1 // Type of the left-hand side target vector
2838  , typename VT2 // Type of the left-hand side vector operand
2839  , typename MT1 // Type of the right-hand side matrix operand
2840  , typename ST2 > // Type of the scalar value
2841  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2842  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2843  {
2844  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2845  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2846  DVecScalarMultExpr::selectDefaultSubAssignKernel( y, x, A, scalar );
2847  else
2848  DVecScalarMultExpr::selectBlasSubAssignKernel( y, x, A, scalar );
2849  }
2850  //**********************************************************************************************
2851 
2852  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2863  template< typename VT1 // Type of the left-hand side target vector
2864  , typename VT2 // Type of the left-hand side vector operand
2865  , typename MT1 // Type of the right-hand side matrix operand
2866  , typename ST2 > // Type of the scalar value
2867  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2868  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2869  {
2870  smpSubAssign( y, x * A * scalar );
2871  }
2872  //**********************************************************************************************
2873 
2874  //**Default subtraction assignment to dense vectors*********************************************
2888  template< typename VT1 // Type of the left-hand side target vector
2889  , typename VT2 // Type of the left-hand side vector operand
2890  , typename MT1 // Type of the right-hand side matrix operand
2891  , typename ST2 > // Type of the scalar value
2892  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2893  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2894  {
2895  y.subAssign( x * A * scalar );
2896  }
2897  //**********************************************************************************************
2898 
2899  //**Vectorized default subtraction assignment to dense vectors**********************************
2913  template< typename VT1 // Type of the left-hand side target vector
2914  , typename VT2 // Type of the left-hand side vector operand
2915  , typename MT1 // Type of the right-hand side matrix operand
2916  , typename ST2 > // Type of the scalar value
2917  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2918  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2919  {
2920  typedef IntrinsicTrait<ElementType> IT;
2921 
2922  const size_t M( A.rows() );
2923  const size_t N( A.columns() );
2924 
2925  size_t j( 0UL );
2926 
2927  for( ; (j+8UL) <= N; j+=8UL ) {
2928  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2929  for( size_t i=0UL; i<M; i+=IT::size ) {
2930  const IntrinsicType x1( x.load(i) );
2931  xmm1 = xmm1 + x1 * A.load(i,j );
2932  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2933  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2934  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2935  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
2936  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
2937  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
2938  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
2939  }
2940  y[j ] -= sum( xmm1 ) * scalar;
2941  y[j+1UL] -= sum( xmm2 ) * scalar;
2942  y[j+2UL] -= sum( xmm3 ) * scalar;
2943  y[j+3UL] -= sum( xmm4 ) * scalar;
2944  y[j+4UL] -= sum( xmm5 ) * scalar;
2945  y[j+5UL] -= sum( xmm6 ) * scalar;
2946  y[j+6UL] -= sum( xmm7 ) * scalar;
2947  y[j+7UL] -= sum( xmm8 ) * scalar;
2948  }
2949  for( ; (j+4UL) <= N; j+=4UL ) {
2950  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2951  for( size_t i=0UL; i<M; i+=IT::size ) {
2952  const IntrinsicType x1( x.load(i) );
2953  xmm1 = xmm1 + x1 * A.load(i,j );
2954  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2955  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2956  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2957  }
2958  y[j ] -= sum( xmm1 ) * scalar;
2959  y[j+1UL] -= sum( xmm2 ) * scalar;
2960  y[j+2UL] -= sum( xmm3 ) * scalar;
2961  y[j+3UL] -= sum( xmm4 ) * scalar;
2962  }
2963  for( ; (j+3UL) <= N; j+=3UL ) {
2964  IntrinsicType xmm1, xmm2, xmm3;
2965  for( size_t i=0UL; i<M; i+=IT::size ) {
2966  const IntrinsicType x1( x.load(i) );
2967  xmm1 = xmm1 + x1 * A.load(i,j );
2968  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2969  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2970  }
2971  y[j ] -= sum( xmm1 ) * scalar;
2972  y[j+1UL] -= sum( xmm2 ) * scalar;
2973  y[j+2UL] -= sum( xmm3 ) * scalar;
2974  }
2975  for( ; (j+2UL) <= N; j+=2UL ) {
2976  IntrinsicType xmm1, xmm2;
2977  for( size_t i=0UL; i<M; i+=IT::size ) {
2978  const IntrinsicType x1( x.load(i) );
2979  xmm1 = xmm1 + x1 * A.load(i,j );
2980  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2981  }
2982  y[j ] -= sum( xmm1 ) * scalar;
2983  y[j+1UL] -= sum( xmm2 ) * scalar;
2984  }
2985  if( j < N ) {
2986  IntrinsicType xmm1;
2987  for( size_t i=0UL; i<M; i+=IT::size ) {
2988  xmm1 = xmm1 + A.load(i,j) * x.load(i);
2989  }
2990  y[j] -= sum( xmm1 ) * scalar;
2991  }
2992  }
2993  //**********************************************************************************************
2994 
2995  //**BLAS-based subtraction assignment to dense vectors (default)********************************
3010  template< typename VT1 // Type of the left-hand side target vector
3011  , typename VT2 // Type of the left-hand side vector operand
3012  , typename MT1 // Type of the right-hand side matrix operand
3013  , typename ST2 > // Type of the scalar value
3014  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3015  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3016  {
3017  selectDefaultSubAssignKernel( y, x, A, scalar );
3018  }
3019  //**********************************************************************************************
3020 
3021  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
3022 #if BLAZE_BLAS_MODE
3023 
3036  template< typename VT1 // Type of the left-hand side target vector
3037  , typename VT2 // Type of the left-hand side vector operand
3038  , typename MT1 // Type of the right-hand side matrix operand
3039  , typename ST2 > // Type of the scalar value
3040  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3041  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3042  {
3043  using boost::numeric_cast;
3044 
3048 
3049  const int M ( numeric_cast<int>( A.rows() ) );
3050  const int N ( numeric_cast<int>( A.columns() ) );
3051  const int lda( numeric_cast<int>( A.spacing() ) );
3052 
3053  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -scalar,
3054  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
3055  }
3056 #endif
3057  //**********************************************************************************************
3058 
3059  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
3060 #if BLAZE_BLAS_MODE
3061 
3074  template< typename VT1 // Type of the left-hand side target vector
3075  , typename VT2 // Type of the left-hand side vector operand
3076  , typename MT1 // Type of the right-hand side matrix operand
3077  , typename ST2 > // Type of the scalar value
3078  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3079  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3080  {
3081  using boost::numeric_cast;
3082 
3086 
3087  const int M ( numeric_cast<int>( A.rows() ) );
3088  const int N ( numeric_cast<int>( A.columns() ) );
3089  const int lda( numeric_cast<int>( A.spacing() ) );
3090 
3091  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -scalar,
3092  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
3093  }
3094 #endif
3095  //**********************************************************************************************
3096 
3097  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
3098 #if BLAZE_BLAS_MODE
3099 
3114  template< typename VT1 // Type of the left-hand side target vector
3115  , typename VT2 // Type of the left-hand side vector operand
3116  , typename MT1 // Type of the right-hand side matrix operand
3117  , typename ST2 > // Type of the scalar value
3118  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3119  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3120  {
3121  using boost::numeric_cast;
3122 
3126  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
3127  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
3128  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
3129 
3130  const int M ( numeric_cast<int>( A.rows() ) );
3131  const int N ( numeric_cast<int>( A.columns() ) );
3132  const int lda( numeric_cast<int>( A.spacing() ) );
3133  const complex<float> alpha( -scalar );
3134  const complex<float> beta ( 1.0F, 0.0F );
3135 
3136  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
3137  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3138  }
3139 #endif
3140  //**********************************************************************************************
3141 
3142  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
3143 #if BLAZE_BLAS_MODE
3144 
3159  template< typename VT1 // Type of the left-hand side target vector
3160  , typename VT2 // Type of the left-hand side vector operand
3161  , typename MT1 // Type of the right-hand side matrix operand
3162  , typename ST2 > // Type of the scalar value
3163  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3164  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3165  {
3166  using boost::numeric_cast;
3167 
3171  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
3172  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
3173  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
3174 
3175  const int M ( numeric_cast<int>( A.rows() ) );
3176  const int N ( numeric_cast<int>( A.columns() ) );
3177  const int lda( numeric_cast<int>( A.spacing() ) );
3178  const complex<double> alpha( -scalar );
3179  const complex<double> beta ( 1.0, 0.0 );
3180 
3181  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
3182  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3183  }
3184 #endif
3185  //**********************************************************************************************
3186 
3187  //**Subtraction assignment to sparse vectors****************************************************
3188  // No special implementation for the subtraction assignment to sparse vectors.
3189  //**********************************************************************************************
3190 
3191  //**Multiplication assignment to dense vectors**************************************************
3203  template< typename VT1 // Type of the target dense vector
3204  , bool TF > // Transpose flag of the target dense vector
3205  friend inline void multAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
3206  {
3208 
3212 
3213  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3214 
3215  const ResultType tmp( rhs );
3216  multAssign( ~lhs, tmp );
3217  }
3218  //**********************************************************************************************
3219 
3220  //**Multiplication assignment to sparse vectors*******************************************************
3221  // No special implementation for the multiplication assignment to sparse vectors.
3222  //**********************************************************************************************
3223 
3224  //**Compile time checks*************************************************************************
3233  //**********************************************************************************************
3234 };
3236 //*************************************************************************************************
3237 
3238 
3239 
3240 
3241 //=================================================================================================
3242 //
3243 // GLOBAL BINARY ARITHMETIC OPERATORS
3244 //
3245 //=================================================================================================
3246 
3247 //*************************************************************************************************
3278 template< typename T1 // Type of the left-hand side dense vector
3279  , typename T2 > // Type of the right-hand side dense matrix
3280 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecTDMatMultExpr<T1,T2> >::Type
3282 {
3284 
3285  if( (~vec).size() != (~mat).rows() )
3286  throw std::invalid_argument( "Vector and matrix sizes do not match" );
3287 
3288  return TDVecTDMatMultExpr<T1,T2>( ~vec, ~mat );
3289 }
3290 //*************************************************************************************************
3291 
3292 
3293 
3294 
3295 //=================================================================================================
3296 //
3297 // EXPRESSION TRAIT SPECIALIZATIONS
3298 //
3299 //=================================================================================================
3300 
3301 //*************************************************************************************************
3303 template< typename VT, typename MT, bool AF >
3304 struct SubvectorExprTrait< TDVecTDMatMultExpr<VT,MT>, AF >
3305 {
3306  public:
3307  //**********************************************************************************************
3308  typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
3309  //**********************************************************************************************
3310 };
3312 //*************************************************************************************************
3313 
3314 } // namespace blaze
3315 
3316 #endif
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:251
Expression object for transpose dense vector-transpose dense matrix multiplications.The TDVecTDMatMultExpr class represents the compile time expression for multiplications between transpose dense vectors and column-major dense matrices.
Definition: Forward.h:131
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecTDMatMultExpr.h:398
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:332
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDVecTDMatMultExpr.h:342
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the IsSame and IsStrictlySame type traits.
const size_t TDVECTDMATMULT_THRESHOLD
Dense Vector/column-major dense matrix multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:108
Constraint on the data type.
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:116
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:114
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:260
Header file for the DenseVector base class.
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
const size_t SMP_TDVECTDMATMULT_THRESHOLD
SMP dense vector/column-major dense matrix multiplication threshold.This threshold represents the sys...
Definition: Thresholds.h:178
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:254
Header file for the RequiresEvaluation type trait.
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:251
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the dense vector SMP implementation.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecTDMatMultExpr.h:376
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecTDMatMultExpr.h:354
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecTDMatMultExpr.h:245
Header file for the IsMatMatMultExpr type trait class.
Header file for the IsBlasCompatible type trait.
TDVecTDMatMultExpr< VT, MT > This
Type of this TDVecTDMatMultExpr instance.
Definition: TDVecTDMatMultExpr.h:242
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:115
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecTDMatMultExpr.h:246
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecTDMatMultExpr.h:257
Constraints on the storage order of matrix types.
Constraint on the data type.
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:112
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:111
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecTDMatMultExpr.h:295
Header file for the EnableIf class template.
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
Header file for the SubmatrixExprTrait class template.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecTDMatMultExpr.h:386
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:396
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecTDMatMultExpr.h:248
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Header file for the TVecMatMultExpr base class.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:247
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecTDMatMultExpr.h:113
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:244
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:397
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecTDMatMultExpr.h:366
Header file for the IsComputation type trait class.
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:243
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Header file for the sparse vector SMP implementation.
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
TDVecTDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecTDMatMultExpr class.
Definition: TDVecTDMatMultExpr.h:280
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:138
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecTDMatMultExpr.h:322
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.