All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
66 #include <blaze/system/BLAS.h>
68 #include <blaze/util/Assert.h>
69 #include <blaze/util/Complex.h>
75 #include <blaze/util/DisableIf.h>
76 #include <blaze/util/EnableIf.h>
78 #include <blaze/util/SelectType.h>
79 #include <blaze/util/Types.h>
85 
86 
87 namespace blaze {
88 
89 //=================================================================================================
90 //
91 // CLASS TDVECDMATMULTEXPR
92 //
93 //=================================================================================================
94 
95 //*************************************************************************************************
102 template< typename VT // Type of the left-hand side dense vector
103  , typename MT > // Type of the right-hand side dense matrix
104 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
105  , private TVecMatMultExpr
106  , private Computation
107 {
108  private:
109  //**Type definitions****************************************************************************
110  typedef typename VT::ResultType VRT;
111  typedef typename MT::ResultType MRT;
112  typedef typename VRT::ElementType VET;
113  typedef typename MRT::ElementType MET;
114  typedef typename VT::CompositeType VCT;
115  typedef typename MT::CompositeType MCT;
116  //**********************************************************************************************
117 
118  //**********************************************************************************************
120  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
121  //**********************************************************************************************
122 
123  //**********************************************************************************************
125  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
127  //**********************************************************************************************
128 
129  //**********************************************************************************************
131 
135  template< typename T1 >
136  struct UseSMPAssign {
137  enum { value = ( evaluateVector || evaluateMatrix ) };
138  };
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144 
148  template< typename T1, typename T2, typename T3 >
149  struct UseSinglePrecisionKernel {
150  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
151  IsFloat<typename T1::ElementType>::value &&
152  IsFloat<typename T2::ElementType>::value &&
153  IsFloat<typename T3::ElementType>::value };
154  };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
164  template< typename T1, typename T2, typename T3 >
165  struct UseDoublePrecisionKernel {
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
167  IsDouble<typename T1::ElementType>::value &&
168  IsDouble<typename T2::ElementType>::value &&
169  IsDouble<typename T3::ElementType>::value };
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
180  template< typename T1, typename T2, typename T3 >
181  struct UseSinglePrecisionComplexKernel {
182  typedef complex<float> Type;
183  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
184  IsSame<typename T1::ElementType,Type>::value &&
185  IsSame<typename T2::ElementType,Type>::value &&
186  IsSame<typename T3::ElementType,Type>::value };
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseDoublePrecisionComplexKernel {
199  typedef complex<double> Type;
200  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
201  IsSame<typename T1::ElementType,Type>::value &&
202  IsSame<typename T2::ElementType,Type>::value &&
203  IsSame<typename T3::ElementType,Type>::value };
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
213  template< typename T1, typename T2, typename T3 >
214  struct UseDefaultKernel {
215  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
216  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
217  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
218  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
219  };
221  //**********************************************************************************************
222 
223  //**********************************************************************************************
225 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234  IntrinsicTrait<typename T1::ElementType>::addition &&
235  IntrinsicTrait<typename T1::ElementType>::multiplication };
236  };
238  //**********************************************************************************************
239 
240  public:
241  //**Type definitions****************************************************************************
247  typedef const ElementType ReturnType;
248  typedef const ResultType CompositeType;
249 
251  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
252 
254  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
255 
258 
261  //**********************************************************************************************
262 
263  //**Compilation flags***************************************************************************
265  enum { vectorizable = VT::vectorizable && MT::vectorizable &&
269 
271  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
272  !evaluateMatrix && MT::smpAssignable };
273  //**********************************************************************************************
274 
275  //**Constructor*********************************************************************************
281  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
282  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
283  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
284  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
285  {
286  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
287  }
288  //**********************************************************************************************
289 
290  //**Subscript operator**************************************************************************
296  inline ReturnType operator[]( size_t index ) const {
297  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
298 
299  ElementType res;
300 
301  if( mat_.rows() != 0UL ) {
302  res = vec_[0UL] * mat_(0UL,index);
303  for( size_t j=1UL; j<end_; j+=2UL ) {
304  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
305  }
306  if( end_ < mat_.rows() ) {
307  res += vec_[end_] * mat_(end_,index);
308  }
309  }
310  else {
311  reset( res );
312  }
313 
314  return res;
315  }
316  //**********************************************************************************************
317 
318  //**Size function*******************************************************************************
323  inline size_t size() const {
324  return mat_.columns();
325  }
326  //**********************************************************************************************
327 
328  //**Left operand access*************************************************************************
333  inline LeftOperand leftOperand() const {
334  return vec_;
335  }
336  //**********************************************************************************************
337 
338  //**Right operand access************************************************************************
343  inline RightOperand rightOperand() const {
344  return mat_;
345  }
346  //**********************************************************************************************
347 
348  //**********************************************************************************************
354  template< typename T >
355  inline bool canAlias( const T* alias ) const {
356  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
357  }
358  //**********************************************************************************************
359 
360  //**********************************************************************************************
366  template< typename T >
367  inline bool isAliased( const T* alias ) const {
368  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
369  }
370  //**********************************************************************************************
371 
372  //**********************************************************************************************
377  inline bool isAligned() const {
378  return vec_.isAligned() && mat_.isAligned();
379  }
380  //**********************************************************************************************
381 
382  //**********************************************************************************************
387  inline bool canSMPAssign() const {
388  return ( !BLAZE_BLAS_IS_PARALLEL ||
389  ( IsComputation<MT>::value && !evaluateMatrix ) ||
390  ( mat_.rows() * mat_.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
392  }
393  //**********************************************************************************************
394 
395  private:
396  //**Member variables****************************************************************************
399  const size_t end_;
400  //**********************************************************************************************
401 
402  //**Assignment to dense vectors*****************************************************************
415  template< typename VT1 > // Type of the target dense vector
416  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
417  {
419 
420  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
421 
422  if( rhs.mat_.rows() == 0UL ) {
423  reset( ~lhs );
424  return;
425  }
426  else if( rhs.mat_.columns() == 0UL ) {
427  return;
428  }
429 
430  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
431  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
432 
433  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
434  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
435  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
436  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
437 
438  TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
439  }
441  //**********************************************************************************************
442 
443  //**Assignment to dense vectors (kernel selection)**********************************************
454  template< typename VT1 // Type of the left-hand side target vector
455  , typename VT2 // Type of the left-hand side vector operand
456  , typename MT1 > // Type of the right-hand side matrix operand
457  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
458  {
459  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
460  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
461  TDVecDMatMultExpr::selectDefaultAssignKernel( y, x, A );
462  else
463  TDVecDMatMultExpr::selectBlasAssignKernel( y, x, A );
464  }
466  //**********************************************************************************************
467 
468  //**Default assignment to dense vectors*********************************************************
482  template< typename VT1 // Type of the left-hand side target vector
483  , typename VT2 // Type of the left-hand side vector operand
484  , typename MT1 > // Type of the right-hand side matrix operand
485  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
486  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
487  {
488  const size_t M( A.rows() );
489  const size_t N( A.columns() );
490 
491  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
492  const size_t jend( N & size_t(-2) );
493 
494  for( size_t j=0UL; j<N; ++j ) {
495  y[j] = x[0UL] * A(0UL,j);
496  }
497  for( size_t i=1UL; i<M; ++i ) {
498  for( size_t j=0UL; j<jend; j+=2UL ) {
499  y[j ] += x[i] * A(i,j );
500  y[j+1UL] += x[i] * A(i,j+1UL);
501  }
502  if( jend < N ) {
503  y[jend] += x[i] * A(i,jend);
504  }
505  }
506  }
508  //**********************************************************************************************
509 
510  //**Vectorized default assignment to dense vectors**********************************************
524  template< typename VT1 // Type of the left-hand side target vector
525  , typename VT2 // Type of the left-hand side vector operand
526  , typename MT1 > // Type of the right-hand side matrix operand
527  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
528  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
529  {
530  typedef IntrinsicTrait<ElementType> IT;
531 
532  const size_t M( A.rows() );
533  const size_t N( A.columns() );
534 
535  size_t j( 0UL );
536 
537  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
538  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
539  for( size_t i=0UL; i<M; ++i ) {
540  const IntrinsicType x1( set( x[i] ) );
541  xmm1 = xmm1 + x1 * A.load(i,j );
542  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
543  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
544  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
545  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
546  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
547  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
548  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
549  }
550  y.store( j , xmm1 );
551  y.store( j+IT::size , xmm2 );
552  y.store( j+IT::size*2UL, xmm3 );
553  y.store( j+IT::size*3UL, xmm4 );
554  y.store( j+IT::size*4UL, xmm5 );
555  y.store( j+IT::size*5UL, xmm6 );
556  y.store( j+IT::size*6UL, xmm7 );
557  y.store( j+IT::size*7UL, xmm8 );
558  }
559  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
560  IntrinsicType xmm1, xmm2, xmm3, xmm4;
561  for( size_t i=0UL; i<M; ++i ) {
562  const IntrinsicType x1( set( x[i] ) );
563  xmm1 = xmm1 + x1 * A.load(i,j );
564  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
565  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
566  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
567  }
568  y.store( j , xmm1 );
569  y.store( j+IT::size , xmm2 );
570  y.store( j+IT::size*2UL, xmm3 );
571  y.store( j+IT::size*3UL, xmm4 );
572  }
573  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
574  IntrinsicType xmm1, xmm2, xmm3;
575  for( size_t i=0UL; i<M; ++i ) {
576  const IntrinsicType x1( set( x[i] ) );
577  xmm1 = xmm1 + x1 * A.load(i,j );
578  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
579  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
580  }
581  y.store( j , xmm1 );
582  y.store( j+IT::size , xmm2 );
583  y.store( j+IT::size*2UL, xmm3 );
584  }
585  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
586  IntrinsicType xmm1, xmm2;
587  for( size_t i=0UL; i<M; ++i ) {
588  const IntrinsicType x1( set( x[i] ) );
589  xmm1 = xmm1 + x1 * A.load(i,j );
590  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
591  }
592  y.store( j , xmm1 );
593  y.store( j+IT::size, xmm2 );
594  }
595  if( j < N ) {
596  IntrinsicType xmm1;
597  for( size_t i=0UL; i<M; ++i ) {
598  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
599  }
600  y.store( j, xmm1 );
601  }
602  }
604  //**********************************************************************************************
605 
606  //**BLAS-based assignment to dense vectors (default)********************************************
620  template< typename VT1 // Type of the left-hand side target vector
621  , typename VT2 // Type of the left-hand side vector operand
622  , typename MT1 > // Type of the right-hand side matrix operand
623  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
624  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
625  {
626  selectDefaultAssignKernel( y, x, A );
627  }
629  //**********************************************************************************************
630 
631  //**BLAS-based assignment to dense vectors (single precision)***********************************
632 #if BLAZE_BLAS_MODE
633 
646  template< typename VT1 // Type of the left-hand side target vector
647  , typename VT2 // Type of the left-hand side vector operand
648  , typename MT1 > // Type of the right-hand side matrix operand
649  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
650  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
651  {
652  using boost::numeric_cast;
653 
657 
658  const int M ( numeric_cast<int>( A.rows() ) );
659  const int N ( numeric_cast<int>( A.columns() ) );
660  const int lda( numeric_cast<int>( A.spacing() ) );
661 
662  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
663  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
664  }
666 #endif
667  //**********************************************************************************************
668 
669  //**BLAS-based assignment to dense vectors (double precision)***********************************
670 #if BLAZE_BLAS_MODE
671 
684  template< typename VT1 // Type of the left-hand side target vector
685  , typename VT2 // Type of the left-hand side vector operand
686  , typename MT1 > // Type of the right-hand side matrix operand
687  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
688  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
689  {
690  using boost::numeric_cast;
691 
695 
696  const int M ( numeric_cast<int>( A.rows() ) );
697  const int N ( numeric_cast<int>( A.columns() ) );
698  const int lda( numeric_cast<int>( A.spacing() ) );
699 
700  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
701  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
702  }
704 #endif
705  //**********************************************************************************************
706 
707  //**BLAS-based assignment to dense vectors (single precision complex)***************************
708 #if BLAZE_BLAS_MODE
709 
722  template< typename VT1 // Type of the left-hand side target vector
723  , typename VT2 // Type of the left-hand side vector operand
724  , typename MT1 > // Type of the right-hand side matrix operand
725  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
726  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
727  {
728  using boost::numeric_cast;
729 
733  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
734  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
735  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
736 
737  const int M ( numeric_cast<int>( A.rows() ) );
738  const int N ( numeric_cast<int>( A.columns() ) );
739  const int lda( numeric_cast<int>( A.spacing() ) );
740  const complex<float> alpha( 1.0F, 0.0F );
741  const complex<float> beta ( 0.0F, 0.0F );
742 
743  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
744  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
745  }
747 #endif
748  //**********************************************************************************************
749 
750  //**BLAS-based assignment to dense vectors (double precision complex)***************************
751 #if BLAZE_BLAS_MODE
752 
765  template< typename VT1 // Type of the left-hand side target vector
766  , typename VT2 // Type of the left-hand side vector operand
767  , typename MT1 > // Type of the right-hand side matrix operand
768  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
769  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
770  {
771  using boost::numeric_cast;
772 
776  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
777  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
778  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
779 
780  const int M ( numeric_cast<int>( A.rows() ) );
781  const int N ( numeric_cast<int>( A.columns() ) );
782  const int lda( numeric_cast<int>( A.spacing() ) );
783  const complex<double> alpha( 1.0, 0.0 );
784  const complex<double> beta ( 0.0, 0.0 );
785 
786  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
787  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
788  }
790 #endif
791  //**********************************************************************************************
792 
793  //**Assignment to sparse vectors****************************************************************
806  template< typename VT1 > // Type of the target sparse vector
807  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
808  {
810 
814 
815  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
816 
817  const ResultType tmp( serial( rhs ) );
818  assign( ~lhs, tmp );
819  }
821  //**********************************************************************************************
822 
823  //**Addition assignment to dense vectors********************************************************
836  template< typename VT1 > // Type of the target dense vector
837  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
838  {
840 
841  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
842 
843  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
844  return;
845  }
846 
847  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
848  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
849 
850  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
851  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
852  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
853  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
854 
855  TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
856  }
858  //**********************************************************************************************
859 
860  //**Addition assignment to dense vectors (kernel selection)*************************************
871  template< typename VT1 // Type of the left-hand side target vector
872  , typename VT2 // Type of the left-hand side vector operand
873  , typename MT1 > // Type of the right-hand side matrix operand
874  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
875  {
876  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
877  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
878  TDVecDMatMultExpr::selectDefaultAddAssignKernel( y, x, A );
879  else
880  TDVecDMatMultExpr::selectBlasAddAssignKernel( y, x, A );
881  }
883  //**********************************************************************************************
884 
885  //**Default addition assignment to dense vectors************************************************
899  template< typename VT1 // Type of the left-hand side target vector
900  , typename VT2 // Type of the left-hand side vector operand
901  , typename MT1 > // Type of the right-hand side matrix operand
902  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
903  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
904  {
905  const size_t M( A.rows() );
906  const size_t N( A.columns() );
907 
908  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
909  const size_t jend( N & size_t(-2) );
910 
911  for( size_t i=0UL; i<M; ++i ) {
912  for( size_t j=0UL; j<jend; j+=2UL ) {
913  y[j ] += x[i] * A(i,j );
914  y[j+1UL] += x[i] * A(i,j+1UL);
915  }
916  if( jend < N ) {
917  y[jend] += x[i] * A(i,jend);
918  }
919  }
920  }
922  //**********************************************************************************************
923 
924  //**Vectorized default addition assignment to dense vectors*************************************
938  template< typename VT1 // Type of the left-hand side target vector
939  , typename VT2 // Type of the left-hand side vector operand
940  , typename MT1 > // Type of the right-hand side matrix operand
941  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
942  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
943  {
944  typedef IntrinsicTrait<ElementType> IT;
945 
946  const size_t M( A.rows() );
947  const size_t N( A.columns() );
948 
949  size_t j( 0UL );
950 
951  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
952  IntrinsicType xmm1( y.load(j ) );
953  IntrinsicType xmm2( y.load(j+IT::size ) );
954  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
955  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
956  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
957  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
958  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
959  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
960  for( size_t i=0UL; i<M; ++i ) {
961  const IntrinsicType x1( set( x[i] ) );
962  xmm1 = xmm1 + x1 * A.load(i,j );
963  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
964  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
965  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
966  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
967  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
968  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
969  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
970  }
971  y.store( j , xmm1 );
972  y.store( j+IT::size , xmm2 );
973  y.store( j+IT::size*2UL, xmm3 );
974  y.store( j+IT::size*3UL, xmm4 );
975  y.store( j+IT::size*4UL, xmm5 );
976  y.store( j+IT::size*5UL, xmm6 );
977  y.store( j+IT::size*6UL, xmm7 );
978  y.store( j+IT::size*7UL, xmm8 );
979  }
980  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
981  IntrinsicType xmm1( y.load(j ) );
982  IntrinsicType xmm2( y.load(j+IT::size ) );
983  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
984  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
985  for( size_t i=0UL; i<M; ++i ) {
986  const IntrinsicType x1( set( x[i] ) );
987  xmm1 = xmm1 + x1 * A.load(i,j );
988  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
989  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
990  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
991  }
992  y.store( j , xmm1 );
993  y.store( j+IT::size , xmm2 );
994  y.store( j+IT::size*2UL, xmm3 );
995  y.store( j+IT::size*3UL, xmm4 );
996  }
997  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
998  IntrinsicType xmm1( y.load(j ) );
999  IntrinsicType xmm2( y.load(j+IT::size ) );
1000  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1001  for( size_t i=0UL; i<M; ++i ) {
1002  const IntrinsicType x1( set( x[i] ) );
1003  xmm1 = xmm1 + x1 * A.load(i,j );
1004  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1005  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1006  }
1007  y.store( j , xmm1 );
1008  y.store( j+IT::size , xmm2 );
1009  y.store( j+IT::size*2UL, xmm3 );
1010  }
1011  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1012  IntrinsicType xmm1( y.load(j ) );
1013  IntrinsicType xmm2( y.load(j+IT::size) );
1014  for( size_t i=0UL; i<M; ++i ) {
1015  const IntrinsicType x1( set( x[i] ) );
1016  xmm1 = xmm1 + x1 * A.load(i,j );
1017  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1018  }
1019  y.store( j , xmm1 );
1020  y.store( j+IT::size, xmm2 );
1021  }
1022  if( j < N ) {
1023  IntrinsicType xmm1( y.load(j) );
1024  for( size_t i=0UL; i<M; ++i ) {
1025  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
1026  }
1027  y.store( j, xmm1 );
1028  }
1029  }
1031  //**********************************************************************************************
1032 
1033  //**BLAS-based addition assignment to dense vectors (default)***********************************
1047  template< typename VT1 // Type of the left-hand side target vector
1048  , typename VT2 // Type of the left-hand side vector operand
1049  , typename MT1 > // Type of the right-hand side matrix operand
1050  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1051  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1052  {
1053  selectDefaultAddAssignKernel( y, x, A );
1054  }
1056  //**********************************************************************************************
1057 
1058  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1059 #if BLAZE_BLAS_MODE
1060 
1073  template< typename VT1 // Type of the left-hand side target vector
1074  , typename VT2 // Type of the left-hand side vector operand
1075  , typename MT1 > // Type of the right-hand side matrix operand
1076  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1077  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1078  {
1079  using boost::numeric_cast;
1080 
1084 
1085  const int M ( numeric_cast<int>( A.rows() ) );
1086  const int N ( numeric_cast<int>( A.columns() ) );
1087  const int lda( numeric_cast<int>( A.spacing() ) );
1088 
1089  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
1090  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1091  }
1093 #endif
1094  //**********************************************************************************************
1095 
1096  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1097 #if BLAZE_BLAS_MODE
1098 
1111  template< typename VT1 // Type of the left-hand side target vector
1112  , typename VT2 // Type of the left-hand side vector operand
1113  , typename MT1 > // Type of the right-hand side matrix operand
1114  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1115  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1116  {
1117  using boost::numeric_cast;
1118 
1122 
1123  const int M ( numeric_cast<int>( A.rows() ) );
1124  const int N ( numeric_cast<int>( A.columns() ) );
1125  const int lda( numeric_cast<int>( A.spacing() ) );
1126 
1127  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
1128  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1129  }
1131 #endif
1132  //**********************************************************************************************
1133 
1134  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1135 #if BLAZE_BLAS_MODE
1136 
1149  template< typename VT1 // Type of the left-hand side target vector
1150  , typename VT2 // Type of the left-hand side vector operand
1151  , typename MT1 > // Type of the right-hand side matrix operand
1152  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1153  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1154  {
1155  using boost::numeric_cast;
1156 
1160  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1161  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1162  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1163 
1164  const int M ( numeric_cast<int>( A.rows() ) );
1165  const int N ( numeric_cast<int>( A.columns() ) );
1166  const int lda( numeric_cast<int>( A.spacing() ) );
1167  const complex<float> alpha( 1.0F, 0.0F );
1168  const complex<float> beta ( 1.0F, 0.0F );
1169 
1170  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1171  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1172  }
1174 #endif
1175  //**********************************************************************************************
1176 
1177  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1178 #if BLAZE_BLAS_MODE
1179 
1192  template< typename VT1 // Type of the left-hand side target vector
1193  , typename VT2 // Type of the left-hand side vector operand
1194  , typename MT1 > // Type of the right-hand side matrix operand
1195  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1196  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1197  {
1198  using boost::numeric_cast;
1199 
1203  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1204  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1205  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1206 
1207  const int M ( numeric_cast<int>( A.rows() ) );
1208  const int N ( numeric_cast<int>( A.columns() ) );
1209  const int lda( numeric_cast<int>( A.spacing() ) );
1210  const complex<double> alpha( 1.0, 0.0 );
1211  const complex<double> beta ( 1.0, 0.0 );
1212 
1213  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1214  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1215  }
1217 #endif
1218  //**********************************************************************************************
1219 
1220  //**Addition assignment to sparse vectors*******************************************************
1221  // No special implementation for the addition assignment to sparse vectors.
1222  //**********************************************************************************************
1223 
1224  //**Subtraction assignment to dense vectors*****************************************************
1237  template< typename VT1 > // Type of the target dense vector
1238  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1239  {
1241 
1242  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1243 
1244  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1245  return;
1246  }
1247 
1248  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
1249  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
1250 
1251  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1252  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1253  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1254  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1255 
1256  TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1257  }
1259  //**********************************************************************************************
1260 
1261  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1272  template< typename VT1 // Type of the left-hand side target vector
1273  , typename VT2 // Type of the left-hand side vector operand
1274  , typename MT1 > // Type of the right-hand side matrix operand
1275  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1276  {
1277  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1278  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1279  TDVecDMatMultExpr::selectDefaultSubAssignKernel( y, x, A );
1280  else
1281  TDVecDMatMultExpr::selectBlasSubAssignKernel( y, x, A );
1282  }
1284  //**********************************************************************************************
1285 
1286  //**Default subtraction assignment to dense vectors*********************************************
1300  template< typename VT1 // Type of the left-hand side target vector
1301  , typename VT2 // Type of the left-hand side vector operand
1302  , typename MT1 > // Type of the right-hand side matrix operand
1303  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1304  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1305  {
1306  const size_t M( A.rows() );
1307  const size_t N( A.columns() );
1308 
1309  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1310  const size_t jend( N & size_t(-2) );
1311 
1312  for( size_t i=0UL; i<M; ++i ) {
1313  for( size_t j=0UL; j<jend; j+=2UL ) {
1314  y[j ] -= x[i] * A(i,j );
1315  y[j+1UL] -= x[i] * A(i,j+1UL);
1316  }
1317  if( jend < N ) {
1318  y[jend] -= x[i] * A(i,jend);
1319  }
1320  }
1321  }
1323  //**********************************************************************************************
1324 
1325  //**Vectorized default subtraction assignment to dense vectors**********************************
1339  template< typename VT1 // Type of the left-hand side target vector
1340  , typename VT2 // Type of the left-hand side vector operand
1341  , typename MT1 > // Type of the right-hand side matrix operand
1342  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1343  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1344  {
1345  typedef IntrinsicTrait<ElementType> IT;
1346 
1347  const size_t M( A.rows() );
1348  const size_t N( A.columns() );
1349 
1350  size_t j( 0UL );
1351 
1352  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1353  IntrinsicType xmm1( y.load(j ) );
1354  IntrinsicType xmm2( y.load(j+IT::size ) );
1355  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1356  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1357  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1358  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1359  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1360  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1361  for( size_t i=0UL; i<M; ++i ) {
1362  const IntrinsicType x1( set( x[i] ) );
1363  xmm1 = xmm1 - x1 * A.load(i,j );
1364  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1365  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1366  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1367  xmm5 = xmm5 - x1 * A.load(i,j+IT::size*4UL);
1368  xmm6 = xmm6 - x1 * A.load(i,j+IT::size*5UL);
1369  xmm7 = xmm7 - x1 * A.load(i,j+IT::size*6UL);
1370  xmm8 = xmm8 - x1 * A.load(i,j+IT::size*7UL);
1371  }
1372  y.store( j , xmm1 );
1373  y.store( j+IT::size , xmm2 );
1374  y.store( j+IT::size*2UL, xmm3 );
1375  y.store( j+IT::size*3UL, xmm4 );
1376  y.store( j+IT::size*4UL, xmm5 );
1377  y.store( j+IT::size*5UL, xmm6 );
1378  y.store( j+IT::size*6UL, xmm7 );
1379  y.store( j+IT::size*7UL, xmm8 );
1380  }
1381  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1382  IntrinsicType xmm1( y.load(j ) );
1383  IntrinsicType xmm2( y.load(j+IT::size ) );
1384  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1385  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1386  for( size_t i=0UL; i<M; ++i ) {
1387  const IntrinsicType x1( set( x[i] ) );
1388  xmm1 = xmm1 - x1 * A.load(i,j );
1389  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1390  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1391  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1392  }
1393  y.store( j , xmm1 );
1394  y.store( j+IT::size , xmm2 );
1395  y.store( j+IT::size*2UL, xmm3 );
1396  y.store( j+IT::size*3UL, xmm4 );
1397  }
1398  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1399  IntrinsicType xmm1( y.load(j ) );
1400  IntrinsicType xmm2( y.load(j+IT::size ) );
1401  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1402  for( size_t i=0UL; i<M; ++i ) {
1403  const IntrinsicType x1( set( x[i] ) );
1404  xmm1 = xmm1 - x1 * A.load(i,j );
1405  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1406  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1407  }
1408  y.store( j , xmm1 );
1409  y.store( j+IT::size , xmm2 );
1410  y.store( j+IT::size*2UL, xmm3 );
1411  }
1412  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1413  IntrinsicType xmm1( y.load(j ) );
1414  IntrinsicType xmm2( y.load(j+IT::size) );
1415  for( size_t i=0UL; i<M; ++i ) {
1416  const IntrinsicType x1( set( x[i] ) );
1417  xmm1 = xmm1 - x1 * A.load(i,j );
1418  xmm2 = xmm2 - x1 * A.load(i,j+IT::size);
1419  }
1420  y.store( j , xmm1 );
1421  y.store( j+IT::size, xmm2 );
1422  }
1423  if( j < N ) {
1424  IntrinsicType xmm1( y.load(j) );
1425  for( size_t i=0UL; i<M; ++i ) {
1426  xmm1 = xmm1 - set( x[i] ) * A.load(i,j);
1427  }
1428  y.store( j, xmm1 );
1429  }
1430  }
1432  //**********************************************************************************************
1433 
1434  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1448  template< typename VT1 // Type of the left-hand side target vector
1449  , typename VT2 // Type of the left-hand side vector operand
1450  , typename MT1 > // Type of the right-hand side matrix operand
1451  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1452  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1453  {
1454  selectDefaultSubAssignKernel( y, x, A );
1455  }
1457  //**********************************************************************************************
1458 
1459  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1460 #if BLAZE_BLAS_MODE
1461 
1474  template< typename VT1 // Type of the left-hand side target vector
1475  , typename VT2 // Type of the left-hand side vector operand
1476  , typename MT1 > // Type of the right-hand side matrix operand
1477  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1478  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1479  {
1480  using boost::numeric_cast;
1481 
1485 
1486  const int M ( numeric_cast<int>( A.rows() ) );
1487  const int N ( numeric_cast<int>( A.columns() ) );
1488  const int lda( numeric_cast<int>( A.spacing() ) );
1489 
1490  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -1.0F,
1491  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1492  }
1494 #endif
1495  //**********************************************************************************************
1496 
1497  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1498 #if BLAZE_BLAS_MODE
1499 
1512  template< typename VT1 // Type of the left-hand side target vector
1513  , typename VT2 // Type of the left-hand side vector operand
1514  , typename MT1 > // Type of the right-hand side matrix operand
1515  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1516  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1517  {
1518  using boost::numeric_cast;
1519 
1523 
1524  const int M ( numeric_cast<int>( A.rows() ) );
1525  const int N ( numeric_cast<int>( A.columns() ) );
1526  const int lda( numeric_cast<int>( A.spacing() ) );
1527 
1528  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -1.0,
1529  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1530  }
1532 #endif
1533  //**********************************************************************************************
1534 
1535  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1536 #if BLAZE_BLAS_MODE
1537 
1550  template< typename VT1 // Type of the left-hand side target vector
1551  , typename VT2 // Type of the left-hand side vector operand
1552  , typename MT1 > // Type of the right-hand side matrix operand
1553  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1554  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1555  {
1556  using boost::numeric_cast;
1557 
1561  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1562  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1563  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1564 
1565  const int M ( numeric_cast<int>( A.rows() ) );
1566  const int N ( numeric_cast<int>( A.columns() ) );
1567  const int lda( numeric_cast<int>( A.spacing() ) );
1568  const complex<float> alpha( -1.0F, 0.0F );
1569  const complex<float> beta ( 1.0F, 0.0F );
1570 
1571  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1572  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1573  }
1575 #endif
1576  //**********************************************************************************************
1577 
1578  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1579 #if BLAZE_BLAS_MODE
1580 
1593  template< typename VT1 // Type of the left-hand side target vector
1594  , typename VT2 // Type of the left-hand side vector operand
1595  , typename MT1 > // Type of the right-hand side matrix operand
1596  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1597  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1598  {
1599  using boost::numeric_cast;
1600 
1604  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1605  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1606  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1607 
1608  const int M ( numeric_cast<int>( A.rows() ) );
1609  const int N ( numeric_cast<int>( A.columns() ) );
1610  const int lda( numeric_cast<int>( A.spacing() ) );
1611  const complex<double> alpha( -1.0, 0.0 );
1612  const complex<double> beta ( 1.0, 0.0 );
1613 
1614  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1615  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1616  }
1618 #endif
1619  //**********************************************************************************************
1620 
1621  //**Subtraction assignment to sparse vectors****************************************************
1622  // No special implementation for the subtraction assignment to sparse vectors.
1623  //**********************************************************************************************
1624 
1625  //**Multiplication assignment to dense vectors**************************************************
1638  template< typename VT1 > // Type of the target dense vector
1639  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1640  {
1642 
1646 
1647  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1648 
1649  const ResultType tmp( serial( rhs ) );
1650  multAssign( ~lhs, tmp );
1651  }
1653  //**********************************************************************************************
1654 
1655  //**Multiplication assignment to sparse vectors*************************************************
1656  // No special implementation for the multiplication assignment to sparse vectors.
1657  //**********************************************************************************************
1658 
1659  //**SMP assignment to dense vectors*************************************************************
1674  template< typename VT1 > // Type of the target dense vector
1675  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1676  smpAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1677  {
1679 
1680  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1681 
1682  if( rhs.mat_.rows() == 0UL ) {
1683  reset( ~lhs );
1684  return;
1685  }
1686  else if( rhs.mat_.columns() == 0UL ) {
1687  return;
1688  }
1689 
1690  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1691  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1692 
1693  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1694  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1695  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1696  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1697 
1698  smpAssign( ~lhs, x * A );
1699  }
1701  //**********************************************************************************************
1702 
1703  //**SMP assignment to sparse vectors************************************************************
1718  template< typename VT1 > // Type of the target sparse vector
1719  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1720  smpAssign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1721  {
1723 
1727 
1728  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1729 
1730  const ResultType tmp( rhs );
1731  smpAssign( ~lhs, tmp );
1732  }
1734  //**********************************************************************************************
1735 
1736  //**SMP addition assignment to dense vectors****************************************************
1751  template< typename VT1 > // Type of the target dense vector
1752  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1753  smpAddAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1754  {
1756 
1757  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1758 
1759  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1760  return;
1761  }
1762 
1763  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1764  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1765 
1766  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1767  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1768  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1769  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1770 
1771  smpAddAssign( ~lhs, x * A );
1772  }
1774  //**********************************************************************************************
1775 
1776  //**SMP addition assignment to sparse vectors***************************************************
1777  // No special implementation for the SMP addition assignment to sparse vectors.
1778  //**********************************************************************************************
1779 
1780  //**SMP subtraction assignment to dense vectors*************************************************
1795  template< typename VT1 > // Type of the target dense vector
1796  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1797  smpSubAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1798  {
1800 
1801  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1802 
1803  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1804  return;
1805  }
1806 
1807  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1808  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1809 
1810  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1811  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1812  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1813  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1814 
1815  smpSubAssign( ~lhs, x * A );
1816  }
1818  //**********************************************************************************************
1819 
1820  //**SMP subtraction assignment to sparse vectors************************************************
1821  // No special implementation for the SMP subtraction assignment to sparse vectors.
1822  //**********************************************************************************************
1823 
1824  //**SMP multiplication assignment to dense vectors**********************************************
1839  template< typename VT1 > // Type of the target dense vector
1840  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1841  smpMultAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1842  {
1844 
1848 
1849  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1850 
1851  const ResultType tmp( rhs );
1852  smpMultAssign( ~lhs, tmp );
1853  }
1855  //**********************************************************************************************
1856 
1857  //**SMP multiplication assignment to sparse vectors*********************************************
1858  // No special implementation for the SMP multiplication assignment to sparse vectors.
1859  //**********************************************************************************************
1860 
1861  //**Compile time checks*************************************************************************
1868  //**********************************************************************************************
1869 };
1870 //*************************************************************************************************
1871 
1872 
1873 
1874 
1875 //=================================================================================================
1876 //
1877 // DVECSCALARMULTEXPR SPECIALIZATION
1878 //
1879 //=================================================================================================
1880 
1881 //*************************************************************************************************
1889 template< typename VT // Type of the left-hand side dense vector
1890  , typename MT // Type of the right-hand side dense matrix
1891  , typename ST > // Type of the side scalar value
1892 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
1893  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1894  , private VecScalarMultExpr
1895  , private Computation
1896 {
1897  private:
1898  //**Type definitions****************************************************************************
1899  typedef TDVecDMatMultExpr<VT,MT> VMM;
1900  typedef typename VMM::ResultType RES;
1901  typedef typename VT::ResultType VRT;
1902  typedef typename MT::ResultType MRT;
1903  typedef typename VRT::ElementType VET;
1904  typedef typename MRT::ElementType MET;
1905  typedef typename VT::CompositeType VCT;
1906  typedef typename MT::CompositeType MCT;
1907  //**********************************************************************************************
1908 
1909  //**********************************************************************************************
1911  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1912  //**********************************************************************************************
1913 
1914  //**********************************************************************************************
1916  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1917  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1918  //**********************************************************************************************
1919 
1920  //**********************************************************************************************
1922 
1925  template< typename T1 >
1926  struct UseSMPAssign {
1927  enum { value = ( evaluateVector || evaluateMatrix ) };
1928  };
1929  //**********************************************************************************************
1930 
1931  //**********************************************************************************************
1933 
1936  template< typename T1, typename T2, typename T3, typename T4 >
1937  struct UseSinglePrecisionKernel {
1938  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1939  IsFloat<typename T1::ElementType>::value &&
1940  IsFloat<typename T2::ElementType>::value &&
1941  IsFloat<typename T3::ElementType>::value &&
1942  !IsComplex<T4>::value };
1943  };
1944  //**********************************************************************************************
1945 
1946  //**********************************************************************************************
1948 
1951  template< typename T1, typename T2, typename T3, typename T4 >
1952  struct UseDoublePrecisionKernel {
1953  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1954  IsDouble<typename T1::ElementType>::value &&
1955  IsDouble<typename T2::ElementType>::value &&
1956  IsDouble<typename T3::ElementType>::value &&
1957  !IsComplex<T4>::value };
1958  };
1959  //**********************************************************************************************
1960 
1961  //**********************************************************************************************
1963 
1966  template< typename T1, typename T2, typename T3 >
1967  struct UseSinglePrecisionComplexKernel {
1968  typedef complex<float> Type;
1969  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1970  IsSame<typename T1::ElementType,Type>::value &&
1971  IsSame<typename T2::ElementType,Type>::value &&
1972  IsSame<typename T3::ElementType,Type>::value };
1973  };
1974  //**********************************************************************************************
1975 
1976  //**********************************************************************************************
1978 
1981  template< typename T1, typename T2, typename T3 >
1982  struct UseDoublePrecisionComplexKernel {
1983  typedef complex<double> Type;
1984  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1985  IsSame<typename T1::ElementType,Type>::value &&
1986  IsSame<typename T2::ElementType,Type>::value &&
1987  IsSame<typename T3::ElementType,Type>::value };
1988  };
1989  //**********************************************************************************************
1990 
1991  //**********************************************************************************************
1993 
1995  template< typename T1, typename T2, typename T3, typename T4 >
1996  struct UseDefaultKernel {
1997  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1998  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1999  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2000  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2001  };
2002  //**********************************************************************************************
2003 
2004  //**********************************************************************************************
2006 
2009  template< typename T1, typename T2, typename T3, typename T4 >
2010  struct UseVectorizedDefaultKernel {
2011  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2012  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2013  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2014  IsSame<typename T1::ElementType,T4>::value &&
2015  IntrinsicTrait<typename T1::ElementType>::addition &&
2016  IntrinsicTrait<typename T1::ElementType>::multiplication };
2017  };
2018  //**********************************************************************************************
2019 
2020  public:
2021  //**Type definitions****************************************************************************
2022  typedef DVecScalarMultExpr<VMM,ST,true> This;
2023  typedef typename MultTrait<RES,ST>::Type ResultType;
2024  typedef typename ResultType::TransposeType TransposeType;
2025  typedef typename ResultType::ElementType ElementType;
2026  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2027  typedef const ElementType ReturnType;
2028  typedef const ResultType CompositeType;
2029 
2031  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
2032 
2034  typedef ST RightOperand;
2035 
2037  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
2038 
2040  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
2041  //**********************************************************************************************
2042 
2043  //**Compilation flags***************************************************************************
2045  enum { vectorizable = VT::vectorizable && MT::vectorizable &&
2046  IsSame<VET,MET>::value &&
2047  IsSame<VET,ST>::value &&
2048  IntrinsicTrait<VET>::addition &&
2049  IntrinsicTrait<VET>::multiplication };
2050 
2052  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2053  !evaluateMatrix && MT::smpAssignable };
2054  //**********************************************************************************************
2055 
2056  //**Constructor*********************************************************************************
2062  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
2063  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2064  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2065  {}
2066  //**********************************************************************************************
2067 
2068  //**Subscript operator**************************************************************************
2074  inline ReturnType operator[]( size_t index ) const {
2075  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2076  return vector_[index] * scalar_;
2077  }
2078  //**********************************************************************************************
2079 
2080  //**Size function*******************************************************************************
2085  inline size_t size() const {
2086  return vector_.size();
2087  }
2088  //**********************************************************************************************
2089 
2090  //**Left operand access*************************************************************************
2095  inline LeftOperand leftOperand() const {
2096  return vector_;
2097  }
2098  //**********************************************************************************************
2099 
2100  //**Right operand access************************************************************************
2105  inline RightOperand rightOperand() const {
2106  return scalar_;
2107  }
2108  //**********************************************************************************************
2109 
2110  //**********************************************************************************************
2116  template< typename T >
2117  inline bool canAlias( const T* alias ) const {
2118  return vector_.canAlias( alias );
2119  }
2120  //**********************************************************************************************
2121 
2122  //**********************************************************************************************
2128  template< typename T >
2129  inline bool isAliased( const T* alias ) const {
2130  return vector_.isAliased( alias );
2131  }
2132  //**********************************************************************************************
2133 
2134  //**********************************************************************************************
2139  inline bool isAligned() const {
2140  return vector_.isAligned();
2141  }
2142  //**********************************************************************************************
2143 
2144  //**********************************************************************************************
2149  inline bool canSMPAssign() const {
2150  typename VMM::RightOperand A( vector_.rightOperand() );
2151  return ( !BLAZE_BLAS_IS_PARALLEL ||
2152  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2153  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
2155  }
2156  //**********************************************************************************************
2157 
2158  private:
2159  //**Member variables****************************************************************************
2160  LeftOperand vector_;
2161  RightOperand scalar_;
2162  //**********************************************************************************************
2163 
2164  //**Assignment to dense vectors*****************************************************************
2176  template< typename VT1 > // Type of the target dense vector
2177  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2178  {
2180 
2181  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2182 
2183  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2184  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2185 
2186  if( right.rows() == 0UL ) {
2187  reset( ~lhs );
2188  return;
2189  }
2190  else if( right.columns() == 0UL ) {
2191  return;
2192  }
2193 
2194  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
2195  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2196 
2197  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2198  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2199  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2200  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2201 
2202  DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2203  }
2204  //**********************************************************************************************
2205 
2206  //**Assignment to dense vectors (kernel selection)**********************************************
2217  template< typename VT1 // Type of the left-hand side target vector
2218  , typename VT2 // Type of the left-hand side vector operand
2219  , typename MT1 // Type of the right-hand side matrix operand
2220  , typename ST2 > // Type of the scalar value
2221  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2222  {
2223  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2224  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2225  DVecScalarMultExpr::selectDefaultAssignKernel( y, x, A, scalar );
2226  else
2227  DVecScalarMultExpr::selectBlasAssignKernel( y, x, A, scalar );
2228  }
2229  //**********************************************************************************************
2230 
2231  //**Default assignment to dense vectors*********************************************************
2245  template< typename VT1 // Type of the left-hand side target vector
2246  , typename VT2 // Type of the left-hand side vector operand
2247  , typename MT1 // Type of the right-hand side matrix operand
2248  , typename ST2 > // Type of the scalar value
2249  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2250  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2251  {
2252  const size_t M( A.rows() );
2253  const size_t N( A.columns() );
2254 
2255  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
2256  const size_t jend( N & size_t(-2) );
2257 
2258  for( size_t j=0UL; j<N; ++j ) {
2259  y[j] = x[0UL] * A(0UL,j);
2260  }
2261  for( size_t i=1UL; i<M; ++i ) {
2262  for( size_t j=0UL; j<jend; j+=2UL ) {
2263  y[j ] += x[i] * A(i,j );
2264  y[j+1UL] += x[i] * A(i,j+1UL);
2265  }
2266  if( jend < N ) {
2267  y[jend] += x[i] * A(i,jend);
2268  }
2269  }
2270  for( size_t j=0UL; j<N; ++j ) {
2271  y[j] *= scalar;
2272  }
2273  }
2274  //**********************************************************************************************
2275 
2276  //**Default assignment to dense vectors*********************************************************
2290  template< typename VT1 // Type of the left-hand side target vector
2291  , typename VT2 // Type of the left-hand side vector operand
2292  , typename MT1 // Type of the right-hand side matrix operand
2293  , typename ST2 > // Type of the scalar value
2294  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2295  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2296  {
2297  typedef IntrinsicTrait<ElementType> IT;
2298 
2299  const size_t M( A.rows() );
2300  const size_t N( A.columns() );
2301 
2302  const IntrinsicType factor( set( scalar ) );
2303 
2304  size_t j( 0UL );
2305 
2306  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2307  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2308  for( size_t i=0UL; i<M; ++i ) {
2309  const IntrinsicType x1( set( x[i] ) );
2310  xmm1 = xmm1 + x1 * A.load(i,j );
2311  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2312  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2313  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2314  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2315  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2316  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2317  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2318  }
2319  y.store( j , xmm1*factor );
2320  y.store( j+IT::size , xmm2*factor );
2321  y.store( j+IT::size*2UL, xmm3*factor );
2322  y.store( j+IT::size*3UL, xmm4*factor );
2323  y.store( j+IT::size*4UL, xmm5*factor );
2324  y.store( j+IT::size*5UL, xmm6*factor );
2325  y.store( j+IT::size*6UL, xmm7*factor );
2326  y.store( j+IT::size*7UL, xmm8*factor );
2327  }
2328  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2329  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2330  for( size_t i=0UL; i<M; ++i ) {
2331  const IntrinsicType x1( set( x[i] ) );
2332  xmm1 = xmm1 + x1 * A.load(i,j );
2333  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2334  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2335  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2336  }
2337  y.store( j , xmm1*factor );
2338  y.store( j+IT::size , xmm2*factor );
2339  y.store( j+IT::size*2UL, xmm3*factor );
2340  y.store( j+IT::size*3UL, xmm4*factor );
2341  }
2342  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2343  IntrinsicType xmm1, xmm2, xmm3;
2344  for( size_t i=0UL; i<M; ++i ) {
2345  const IntrinsicType x1( set( x[i] ) );
2346  xmm1 = xmm1 + x1 * A.load(i,j );
2347  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2348  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2349  }
2350  y.store( j , xmm1*factor );
2351  y.store( j+IT::size , xmm2*factor );
2352  y.store( j+IT::size*2UL, xmm3*factor );
2353  }
2354  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2355  IntrinsicType xmm1, xmm2;
2356  for( size_t i=0UL; i<M; ++i ) {
2357  const IntrinsicType x1( set( x[i] ) );
2358  xmm1 = xmm1 + x1 * A.load(i,j );
2359  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2360  }
2361  y.store( j , xmm1*factor );
2362  y.store( j+IT::size, xmm2*factor );
2363  }
2364  if( j < N ) {
2365  IntrinsicType xmm1;
2366  for( size_t i=0UL; i<M; ++i ) {
2367  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2368  }
2369  y.store( j, xmm1*factor );
2370  }
2371  }
2372  //**********************************************************************************************
2373 
2374  //**BLAS-based assignment to dense vectors (default)********************************************
2387  template< typename VT1 // Type of the left-hand side target vector
2388  , typename VT2 // Type of the left-hand side vector operand
2389  , typename MT1 // Type of the right-hand side matrix operand
2390  , typename ST2 > // Type of the scalar value
2391  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2392  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2393  {
2394  selectDefaultAssignKernel( y, x, A, scalar );
2395  }
2396  //**********************************************************************************************
2397 
2398  //**BLAS-based assignment to dense vectors (single precision)***********************************
2399 #if BLAZE_BLAS_MODE
2400 
2413  template< typename VT1 // Type of the left-hand side target vector
2414  , typename VT2 // Type of the left-hand side vector operand
2415  , typename MT1 // Type of the right-hand side matrix operand
2416  , typename ST2 > // Type of the scalar value
2417  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2418  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2419  {
2420  using boost::numeric_cast;
2421 
2425 
2426  const int M ( numeric_cast<int>( A.rows() ) );
2427  const int N ( numeric_cast<int>( A.columns() ) );
2428  const int lda( numeric_cast<int>( A.spacing() ) );
2429 
2430  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2431  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2432  }
2433 #endif
2434  //**********************************************************************************************
2435 
2436  //**BLAS-based assignment to dense vectors (double precision)***********************************
2437 #if BLAZE_BLAS_MODE
2438 
2451  template< typename VT1 // Type of the left-hand side target vector
2452  , typename VT2 // Type of the left-hand side vector operand
2453  , typename MT1 // Type of the right-hand side matrix operand
2454  , typename ST2 > // Type of the scalar value
2455  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2456  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2457  {
2458  using boost::numeric_cast;
2459 
2463 
2464  const int M ( numeric_cast<int>( A.rows() ) );
2465  const int N ( numeric_cast<int>( A.columns() ) );
2466  const int lda( numeric_cast<int>( A.spacing() ) );
2467 
2468  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2469  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2470  }
2471 #endif
2472  //**********************************************************************************************
2473 
2474  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2475 #if BLAZE_BLAS_MODE
2476 
2489  template< typename VT1 // Type of the left-hand side target vector
2490  , typename VT2 // Type of the left-hand side vector operand
2491  , typename MT1 // Type of the right-hand side matrix operand
2492  , typename ST2 > // Type of the scalar value
2493  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2494  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2495  {
2496  using boost::numeric_cast;
2497 
2501  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2502  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2503  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2504 
2505  const int M ( numeric_cast<int>( A.rows() ) );
2506  const int N ( numeric_cast<int>( A.columns() ) );
2507  const int lda( numeric_cast<int>( A.spacing() ) );
2508  const complex<float> alpha( scalar );
2509  const complex<float> beta ( 0.0F, 0.0F );
2510 
2511  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2512  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2513  }
2514 #endif
2515  //**********************************************************************************************
2516 
2517  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2518 #if BLAZE_BLAS_MODE
2519 
2532  template< typename VT1 // Type of the left-hand side target vector
2533  , typename VT2 // Type of the left-hand side vector operand
2534  , typename MT1 // Type of the right-hand side matrix operand
2535  , typename ST2 > // Type of the scalar value
2536  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2537  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2538  {
2539  using boost::numeric_cast;
2540 
2544  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2545  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2546  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2547 
2548  const int M ( numeric_cast<int>( A.rows() ) );
2549  const int N ( numeric_cast<int>( A.columns() ) );
2550  const int lda( numeric_cast<int>( A.spacing() ) );
2551  const complex<double> alpha( scalar );
2552  const complex<double> beta ( 0.0, 0.0 );
2553 
2554  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2555  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2556  }
2557 #endif
2558  //**********************************************************************************************
2559 
2560  //**Assignment to sparse vectors****************************************************************
2572  template< typename VT1 > // Type of the target sparse vector
2573  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2574  {
2576 
2580 
2581  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2582 
2583  const ResultType tmp( serial( rhs ) );
2584  assign( ~lhs, tmp );
2585  }
2586  //**********************************************************************************************
2587 
2588  //**Addition assignment to dense vectors********************************************************
2600  template< typename VT1 > // Type of the target dense vector
2601  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2602  {
2604 
2605  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2606 
2607  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2608  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2609 
2610  if( right.rows() == 0UL || right.columns() == 0UL ) {
2611  return;
2612  }
2613 
2614  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
2615  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2616 
2617  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2618  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2619  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2620  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2621 
2622  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2623  }
2624  //**********************************************************************************************
2625 
2626  //**Addition assignment to dense vectors (kernel selection)*************************************
2637  template< typename VT1 // Type of the left-hand side target vector
2638  , typename VT2 // Type of the left-hand side vector operand
2639  , typename MT1 // Type of the right-hand side matrix operand
2640  , typename ST2 > // Type of the scalar value
2641  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2642  {
2643  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2644  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2645  DVecScalarMultExpr::selectDefaultAddAssignKernel( y, x, A, scalar );
2646  else
2647  DVecScalarMultExpr::selectBlasAddAssignKernel( y, x, A, scalar );
2648  }
2649  //**********************************************************************************************
2650 
2651  //**Default addition assignment to dense vectors************************************************
2665  template< typename VT1 // Type of the left-hand side target vector
2666  , typename VT2 // Type of the left-hand side vector operand
2667  , typename MT1 // Type of the right-hand side matrix operand
2668  , typename ST2 > // Type of the scalar value
2669  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2670  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2671  {
2672  y.addAssign( x * A * scalar );
2673  }
2674  //**********************************************************************************************
2675 
2676  //**Vectorized default addition assignment to dense vectors*************************************
2690  template< typename VT1 // Type of the left-hand side target vector
2691  , typename VT2 // Type of the left-hand side vector operand
2692  , typename MT1 // Type of the right-hand side matrix operand
2693  , typename ST2 > // Type of the scalar value
2694  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2695  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2696  {
2697  typedef IntrinsicTrait<ElementType> IT;
2698 
2699  const size_t M( A.rows() );
2700  const size_t N( A.columns() );
2701 
2702  const IntrinsicType factor( set( scalar ) );
2703 
2704  size_t j( 0UL );
2705 
2706  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2707  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2708  for( size_t i=0UL; i<M; ++i ) {
2709  const IntrinsicType x1( set( x[i] ) );
2710  xmm1 = xmm1 + x1 * A.load(i,j );
2711  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2712  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2713  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2714  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2715  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2716  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2717  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2718  }
2719  y.store( j , y.load(j ) + xmm1*factor );
2720  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2721  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2722  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2723  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
2724  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
2725  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
2726  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
2727  }
2728  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2729  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2730  for( size_t i=0UL; i<M; ++i ) {
2731  const IntrinsicType x1( set( x[i] ) );
2732  xmm1 = xmm1 + x1 * A.load(i,j );
2733  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2734  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2735  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2736  }
2737  y.store( j , y.load(j ) + xmm1*factor );
2738  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2739  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2740  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2741  }
2742  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2743  IntrinsicType xmm1, xmm2, xmm3;
2744  for( size_t i=0UL; i<M; ++i ) {
2745  const IntrinsicType x1( set( x[i] ) );
2746  xmm1 = xmm1 + x1 * A.load(i,j );
2747  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2748  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2749  }
2750  y.store( j , y.load(j ) + xmm1*factor );
2751  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2752  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2753  }
2754  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2755  IntrinsicType xmm1, xmm2;
2756  for( size_t i=0UL; i<M; ++i ) {
2757  const IntrinsicType x1( set( x[i] ) );
2758  xmm1 = xmm1 + x1 * A.load(i,j );
2759  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2760  }
2761  y.store( j , y.load(j ) + xmm1*factor );
2762  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
2763  }
2764  if( j < N ) {
2765  IntrinsicType xmm1;
2766  for( size_t i=0UL; i<M; ++i ) {
2767  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2768  }
2769  y.store( j, y.load(j) + xmm1*factor );
2770  }
2771  }
2772  //**********************************************************************************************
2773 
2774  //**BLAS-based addition assignment to dense vectors (default)***********************************
2788  template< typename VT1 // Type of the left-hand side target vector
2789  , typename VT2 // Type of the left-hand side vector operand
2790  , typename MT1 // Type of the right-hand side matrix operand
2791  , typename ST2 > // Type of the scalar value
2792  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2793  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2794  {
2795  selectDefaultAddAssignKernel( y, x, A, scalar );
2796  }
2797  //**********************************************************************************************
2798 
2799  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2800 #if BLAZE_BLAS_MODE
2801 
2814  template< typename VT1 // Type of the left-hand side target vector
2815  , typename VT2 // Type of the left-hand side vector operand
2816  , typename MT1 // Type of the right-hand side matrix operand
2817  , typename ST2 > // Type of the scalar value
2818  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2819  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2820  {
2821  using boost::numeric_cast;
2822 
2826 
2827  const int M ( numeric_cast<int>( A.rows() ) );
2828  const int N ( numeric_cast<int>( A.columns() ) );
2829  const int lda( numeric_cast<int>( A.spacing() ) );
2830 
2831  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2832  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2833  }
2834 #endif
2835  //**********************************************************************************************
2836 
2837  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2838 #if BLAZE_BLAS_MODE
2839 
2852  template< typename VT1 // Type of the left-hand side target vector
2853  , typename VT2 // Type of the left-hand side vector operand
2854  , typename MT1 // Type of the right-hand side matrix operand
2855  , typename ST2 > // Type of the scalar value
2856  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2857  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2858  {
2859  using boost::numeric_cast;
2860 
2864 
2865  const int M ( numeric_cast<int>( A.rows() ) );
2866  const int N ( numeric_cast<int>( A.columns() ) );
2867  const int lda( numeric_cast<int>( A.spacing() ) );
2868 
2869  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2870  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2871  }
2872 #endif
2873  //**********************************************************************************************
2874 
2875  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2876 #if BLAZE_BLAS_MODE
2877 
2890  template< typename VT1 // Type of the left-hand side target vector
2891  , typename VT2 // Type of the left-hand side vector operand
2892  , typename MT1 // Type of the right-hand side matrix operand
2893  , typename ST2 > // Type of the scalar value
2894  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2895  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2896  {
2897  using boost::numeric_cast;
2898 
2902  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2903  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2904  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2905 
2906  const int M ( numeric_cast<int>( A.rows() ) );
2907  const int N ( numeric_cast<int>( A.columns() ) );
2908  const int lda( numeric_cast<int>( A.spacing() ) );
2909  const complex<float> alpha( scalar );
2910  const complex<float> beta ( 1.0F, 0.0F );
2911 
2912  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2913  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2914  }
2915 #endif
2916  //**********************************************************************************************
2917 
2918  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2919 #if BLAZE_BLAS_MODE
2920 
2933  template< typename VT1 // Type of the left-hand side target vector
2934  , typename VT2 // Type of the left-hand side vector operand
2935  , typename MT1 // Type of the right-hand side matrix operand
2936  , typename ST2 > // Type of the scalar value
2937  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2938  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2939  {
2940  using boost::numeric_cast;
2941 
2945  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2946  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2947  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2948 
2949  const int M ( numeric_cast<int>( A.rows() ) );
2950  const int N ( numeric_cast<int>( A.columns() ) );
2951  const int lda( numeric_cast<int>( A.spacing() ) );
2952  const complex<double> alpha( scalar );
2953  const complex<double> beta ( 1.0, 0.0 );
2954 
2955  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2956  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2957  }
2958 #endif
2959  //**********************************************************************************************
2960 
2961  //**Addition assignment to sparse vectors*******************************************************
2962  // No special implementation for the addition assignment to sparse vectors.
2963  //**********************************************************************************************
2964 
2965  //**Subtraction assignment to dense vectors*****************************************************
2977  template< typename VT1 > // Type of the target dense vector
2978  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2979  {
2981 
2982  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2983 
2984  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2985  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2986 
2987  if( right.rows() == 0UL || right.columns() == 0UL ) {
2988  return;
2989  }
2990 
2991  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
2992  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2993 
2994  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2995  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2996  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2997  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2998 
2999  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
3000  }
3001  //**********************************************************************************************
3002 
3003  //**Subtraction assignment to dense vectors (kernel selection)**********************************
3014  template< typename VT1 // Type of the left-hand side target vector
3015  , typename VT2 // Type of the left-hand side vector operand
3016  , typename MT1 // Type of the right-hand side matrix operand
3017  , typename ST2 > // Type of the scalar value
3018  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3019  {
3020  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
3021  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3022  DVecScalarMultExpr::selectDefaultSubAssignKernel( y, x, A, scalar );
3023  else
3024  DVecScalarMultExpr::selectBlasSubAssignKernel( y, x, A, scalar );
3025  }
3026  //**********************************************************************************************
3027 
3028  //**Default subtraction assignment to dense vectors*********************************************
3042  template< typename VT1 // Type of the left-hand side target vector
3043  , typename VT2 // Type of the left-hand side vector operand
3044  , typename MT1 // Type of the right-hand side matrix operand
3045  , typename ST2 > // Type of the scalar value
3046  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3047  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3048  {
3049  y.subAssign( x * A * scalar );
3050  }
3051  //**********************************************************************************************
3052 
3053  //**Vectorized default subtraction assignment to dense vectors**********************************
3067  template< typename VT1 // Type of the left-hand side target vector
3068  , typename VT2 // Type of the left-hand side vector operand
3069  , typename MT1 // Type of the right-hand side matrix operand
3070  , typename ST2 > // Type of the scalar value
3071  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3072  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3073  {
3074  typedef IntrinsicTrait<ElementType> IT;
3075 
3076  const size_t M( A.rows() );
3077  const size_t N( A.columns() );
3078 
3079  const IntrinsicType factor( set( scalar ) );
3080 
3081  size_t j( 0UL );
3082 
3083  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3084  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3085  for( size_t i=0UL; i<M; ++i ) {
3086  const IntrinsicType x1( set( x[i] ) );
3087  xmm1 = xmm1 + x1 * A.load(i,j );
3088  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3089  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3090  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3091  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3092  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3093  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3094  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3095  }
3096  y.store( j , y.load(j ) - xmm1*factor );
3097  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3098  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3099  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
3100  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
3101  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
3102  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
3103  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
3104  }
3105  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3106  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3107  for( size_t i=0UL; i<M; ++i ) {
3108  const IntrinsicType x1( set( x[i] ) );
3109  xmm1 = xmm1 + x1 * A.load(i,j );
3110  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3111  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3112  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3113  }
3114  y.store( j , y.load(j ) - xmm1*factor );
3115  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3116  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3117  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
3118  }
3119  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
3120  IntrinsicType xmm1, xmm2, xmm3;
3121  for( size_t i=0UL; i<M; ++i ) {
3122  const IntrinsicType x1( set( x[i] ) );
3123  xmm1 = xmm1 + x1 * A.load(i,j );
3124  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3125  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3126  }
3127  y.store( j , y.load(j ) - xmm1*factor );
3128  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3129  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3130  }
3131  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3132  IntrinsicType xmm1, xmm2;
3133  for( size_t i=0UL; i<M; ++i ) {
3134  const IntrinsicType x1( set( x[i] ) );
3135  xmm1 = xmm1 + x1 * A.load(i,j );
3136  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3137  }
3138  y.store( j , y.load(j ) - xmm1*factor );
3139  y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
3140  }
3141  if( j < N ) {
3142  IntrinsicType xmm1;
3143  for( size_t i=0UL; i<M; ++i ) {
3144  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3145  }
3146  y.store( j, y.load(j) - xmm1*factor );
3147  }
3148  }
3149  //**********************************************************************************************
3150 
3151  //**BLAS-based subtraction assignment to dense vectors (default)********************************
3165  template< typename VT1 // Type of the left-hand side target vector
3166  , typename VT2 // Type of the left-hand side vector operand
3167  , typename MT1 // Type of the right-hand side matrix operand
3168  , typename ST2 > // Type of the scalar value
3169  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3170  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3171  {
3172  selectDefaultSubAssignKernel( y, x, A, scalar );
3173  }
3174  //**********************************************************************************************
3175 
3176  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
3177 #if BLAZE_BLAS_MODE
3178 
3191  template< typename VT1 // Type of the left-hand side target vector
3192  , typename VT2 // Type of the left-hand side vector operand
3193  , typename MT1 // Type of the right-hand side matrix operand
3194  , typename ST2 > // Type of the scalar value
3195  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3196  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3197  {
3198  using boost::numeric_cast;
3199 
3203 
3204  const int M ( numeric_cast<int>( A.rows() ) );
3205  const int N ( numeric_cast<int>( A.columns() ) );
3206  const int lda( numeric_cast<int>( A.spacing() ) );
3207 
3208  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
3209  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
3210  }
3211 #endif
3212  //**********************************************************************************************
3213 
3214  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
3215 #if BLAZE_BLAS_MODE
3216 
3229  template< typename VT1 // Type of the left-hand side target vector
3230  , typename VT2 // Type of the left-hand side vector operand
3231  , typename MT1 // Type of the right-hand side matrix operand
3232  , typename ST2 > // Type of the scalar value
3233  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3234  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3235  {
3236  using boost::numeric_cast;
3237 
3241 
3242  const int M ( numeric_cast<int>( A.rows() ) );
3243  const int N ( numeric_cast<int>( A.columns() ) );
3244  const int lda( numeric_cast<int>( A.spacing() ) );
3245 
3246  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
3247  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
3248  }
3249 #endif
3250  //**********************************************************************************************
3251 
3252  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
3253 #if BLAZE_BLAS_MODE
3254 
3267  template< typename VT1 // Type of the left-hand side target vector
3268  , typename VT2 // Type of the left-hand side vector operand
3269  , typename MT1 // Type of the right-hand side matrix operand
3270  , typename ST2 > // Type of the scalar value
3271  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3272  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3273  {
3274  using boost::numeric_cast;
3275 
3279  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
3280  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
3281  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
3282 
3283  const int M ( numeric_cast<int>( A.rows() ) );
3284  const int N ( numeric_cast<int>( A.columns() ) );
3285  const int lda( numeric_cast<int>( A.spacing() ) );
3286  const complex<float> alpha( -scalar );
3287  const complex<float> beta ( 1.0F, 0.0F );
3288 
3289  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
3290  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3291  }
3292 #endif
3293  //**********************************************************************************************
3294 
3295  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
3296 #if BLAZE_BLAS_MODE
3297 
3310  template< typename VT1 // Type of the left-hand side target vector
3311  , typename VT2 // Type of the left-hand side vector operand
3312  , typename MT1 // Type of the right-hand side matrix operand
3313  , typename ST2 > // Type of the scalar value
3314  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3315  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3316  {
3317  using boost::numeric_cast;
3318 
3322  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
3323  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
3324  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
3325 
3326  const int M ( numeric_cast<int>( A.rows() ) );
3327  const int N ( numeric_cast<int>( A.columns() ) );
3328  const int lda( numeric_cast<int>( A.spacing() ) );
3329  const complex<double> alpha( -scalar );
3330  const complex<double> beta ( 1.0, 0.0 );
3331 
3332  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
3333  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3334  }
3335 #endif
3336  //**********************************************************************************************
3337 
3338  //**Subtraction assignment to sparse vectors****************************************************
3339  // No special implementation for the subtraction assignment to sparse vectors.
3340  //**********************************************************************************************
3341 
3342  //**Multiplication assignment to dense vectors**************************************************
3354  template< typename VT1 > // Type of the target dense vector
3355  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3356  {
3358 
3362 
3363  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3364 
3365  const ResultType tmp( serial( rhs ) );
3366  multAssign( ~lhs, tmp );
3367  }
3368  //**********************************************************************************************
3369 
3370  //**Multiplication assignment to sparse vectors*************************************************
3371  // No special implementation for the multiplication assignment to sparse vectors.
3372  //**********************************************************************************************
3373 
3374  //**SMP assignment to dense vectors*************************************************************
3388  template< typename VT1 > // Type of the target dense vector
3389  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3390  smpAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3391  {
3393 
3394  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3395 
3396  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3397  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3398 
3399  if( right.rows() == 0UL ) {
3400  reset( ~lhs );
3401  return;
3402  }
3403  else if( right.columns() == 0UL ) {
3404  return;
3405  }
3406 
3407  LT x( left ); // Evaluation of the left-hand side dense vector operand
3408  RT A( right ); // Evaluation of the right-hand side dense matrix operand
3409 
3410  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3411  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3412  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3413  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3414 
3415  smpAssign( ~lhs, x * A * rhs.scalar_ );
3416  }
3417  //**********************************************************************************************
3418 
3419  //**SMP assignment to sparse vectors************************************************************
3433  template< typename VT1 > // Type of the target sparse vector
3434  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3435  smpAssign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3436  {
3438 
3442 
3443  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3444 
3445  const ResultType tmp( rhs );
3446  smpAssign( ~lhs, tmp );
3447  }
3448  //**********************************************************************************************
3449 
3450  //**SMP addition assignment to dense vectors****************************************************
3464  template< typename VT1 > // Type of the target dense vector
3465  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3466  smpAddAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3467  {
3469 
3470  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3471 
3472  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3473  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3474 
3475  if( right.rows() == 0UL || right.columns() == 0UL ) {
3476  return;
3477  }
3478 
3479  LT x( left ); // Evaluation of the left-hand side dense vector operand
3480  RT A( right ); // Evaluation of the right-hand side dense matrix operand
3481 
3482  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3483  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3484  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3485  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3486 
3487  smpAddAssign( ~lhs, x * A * rhs.scalar_ );
3488  }
3489  //**********************************************************************************************
3490 
3491  //**SMP addition assignment to sparse vectors***************************************************
3492  // No special implementation for the SMP addition assignment to sparse vectors.
3493  //**********************************************************************************************
3494 
3495  //**SMP subtraction assignment to dense vectors*************************************************
3509  template< typename VT1 > // Type of the target dense vector
3510  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3511  smpSubAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3512  {
3514 
3515  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3516 
3517  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3518  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3519 
3520  if( right.rows() == 0UL || right.columns() == 0UL ) {
3521  return;
3522  }
3523 
3524  LT x( left ); // Evaluation of the left-hand side dense vector operand
3525  RT A( right ); // Evaluation of the right-hand side dense matrix operand
3526 
3527  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3528  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3529  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3530  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3531 
3532  smpSubAssign( ~lhs, x * A * rhs.scalar_ );
3533  }
3534  //**********************************************************************************************
3535 
3536  //**SMP subtraction assignment to sparse vectors************************************************
3537  // No special implementation for the SMP subtraction assignment to sparse vectors.
3538  //**********************************************************************************************
3539 
3540  //**SMP multiplication assignment to dense vectors**********************************************
3555  template< typename VT1 > // Type of the target dense vector
3556  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3557  smpMultAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3558  {
3560 
3564 
3565  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3566 
3567  const ResultType tmp( rhs );
3568  smpMultAssign( ~lhs, tmp );
3569  }
3570  //**********************************************************************************************
3571 
3572  //**SMP multiplication assignment to sparse vectors*********************************************
3573  // No special implementation for the SMP multiplication assignment to sparse vectors.
3574  //**********************************************************************************************
3575 
3576  //**Compile time checks*************************************************************************
3585  //**********************************************************************************************
3586 };
3588 //*************************************************************************************************
3589 
3590 
3591 
3592 
3593 //=================================================================================================
3594 //
3595 // GLOBAL BINARY ARITHMETIC OPERATORS
3596 //
3597 //=================================================================================================
3598 
3599 //*************************************************************************************************
3630 template< typename T1 // Type of the left-hand side dense vector
3631  , typename T2 > // Type of the right-hand side dense matrix
3632 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
3634 {
3636 
3637  if( (~vec).size() != (~mat).rows() )
3638  throw std::invalid_argument( "Vector and matrix sizes do not match" );
3639 
3640  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
3641 }
3642 //*************************************************************************************************
3643 
3644 
3645 
3646 
3647 //=================================================================================================
3648 //
3649 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
3650 //
3651 //=================================================================================================
3652 
3653 //*************************************************************************************************
3666 template< typename T1 // Type of the left-hand side dense vector
3667  , typename T2 // Type of the right-hand side dense matrix
3668  , bool SO > // Storage order of the right-hand side dense matrix
3669 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
3671 {
3673 
3674  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
3675 }
3676 //*************************************************************************************************
3677 
3678 
3679 
3680 
3681 //=================================================================================================
3682 //
3683 // EXPRESSION TRAIT SPECIALIZATIONS
3684 //
3685 //=================================================================================================
3686 
3687 //*************************************************************************************************
3689 template< typename VT, typename MT, bool AF >
3690 struct SubvectorExprTrait< TDVecDMatMultExpr<VT,MT>, AF >
3691 {
3692  public:
3693  //**********************************************************************************************
3694  typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
3695  //**********************************************************************************************
3696 };
3698 //*************************************************************************************************
3699 
3700 } // namespace blaze
3701 
3702 #endif
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:377
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:111
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:247
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:115
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:387
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:179
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:343
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
Header file for the DenseVector base class.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:296
Header file for the VecScalarMultExpr base class.
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:257
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:242
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:251
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:253
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:244
Header file for the multiplication trait.
Header file for the IsDouble type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:246
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:355
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:281
Header file for the IsMatMatMultExpr type trait class.
const size_t SMP_TDVECDMATMULT_THRESHOLD
SMP dense vector/row-major dense matrix multiplication threshold.This threshold specifies when a dens...
Definition: Thresholds.h:368
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Constraint on the data type.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:367
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:361
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the serial shim.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:323
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:110
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:112
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:254
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:333
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:113
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
Header file for the TVecMatMultExpr base class.
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:398
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:133
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:245
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:260
Header file for all intrinsic functionality.
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecDMatMultExpr.h:399
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:114
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:248
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:397
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:154
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:243
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.