All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
67 #include <blaze/system/BLAS.h>
69 #include <blaze/util/Assert.h>
70 #include <blaze/util/Complex.h>
76 #include <blaze/util/DisableIf.h>
77 #include <blaze/util/EnableIf.h>
79 #include <blaze/util/SelectType.h>
80 #include <blaze/util/Types.h>
86 
87 
88 namespace blaze {
89 
90 //=================================================================================================
91 //
92 // CLASS TDVECDMATMULTEXPR
93 //
94 //=================================================================================================
95 
96 //*************************************************************************************************
103 template< typename VT // Type of the left-hand side dense vector
104  , typename MT > // Type of the right-hand side dense matrix
105 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
106  , private TVecMatMultExpr
107  , private Computation
108 {
109  private:
110  //**Type definitions****************************************************************************
111  typedef typename VT::ResultType VRT;
112  typedef typename MT::ResultType MRT;
113  typedef typename VRT::ElementType VET;
114  typedef typename MRT::ElementType MET;
115  typedef typename VT::CompositeType VCT;
116  typedef typename MT::CompositeType MCT;
117  //**********************************************************************************************
118 
119  //**********************************************************************************************
121  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
122  //**********************************************************************************************
123 
124  //**********************************************************************************************
126  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
128  //**********************************************************************************************
129 
130  //**********************************************************************************************
132 
135  template< typename T1, typename T2, typename T3 >
136  struct UseSMPAssignKernel {
137  enum { value = evaluateVector || evaluateMatrix };
138  };
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144 
148  template< typename T1, typename T2, typename T3 >
149  struct UseSinglePrecisionKernel {
150  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
151  IsFloat<typename T1::ElementType>::value &&
152  IsFloat<typename T2::ElementType>::value &&
153  IsFloat<typename T3::ElementType>::value };
154  };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
164  template< typename T1, typename T2, typename T3 >
165  struct UseDoublePrecisionKernel {
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
167  IsDouble<typename T1::ElementType>::value &&
168  IsDouble<typename T2::ElementType>::value &&
169  IsDouble<typename T3::ElementType>::value };
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
180  template< typename T1, typename T2, typename T3 >
181  struct UseSinglePrecisionComplexKernel {
182  typedef complex<float> Type;
183  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
184  IsSame<typename T1::ElementType,Type>::value &&
185  IsSame<typename T2::ElementType,Type>::value &&
186  IsSame<typename T3::ElementType,Type>::value };
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseDoublePrecisionComplexKernel {
199  typedef complex<double> Type;
200  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
201  IsSame<typename T1::ElementType,Type>::value &&
202  IsSame<typename T2::ElementType,Type>::value &&
203  IsSame<typename T3::ElementType,Type>::value };
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
213  template< typename T1, typename T2, typename T3 >
214  struct UseDefaultKernel {
215  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
216  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
217  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
218  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
219  };
221  //**********************************************************************************************
222 
223  //**********************************************************************************************
225 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234  IntrinsicTrait<typename T1::ElementType>::addition &&
235  IntrinsicTrait<typename T1::ElementType>::multiplication };
236  };
238  //**********************************************************************************************
239 
240  public:
241  //**Type definitions****************************************************************************
247  typedef const ElementType ReturnType;
248  typedef const ResultType CompositeType;
249 
251  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
252 
254  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
255 
258 
261  //**********************************************************************************************
262 
263  //**Compilation flags***************************************************************************
265  enum { vectorizable = VT::vectorizable && MT::vectorizable &&
269 
271  enum { smpAssignable = !evaluateVector && !evaluateMatrix };
272  //**********************************************************************************************
273 
274  //**Constructor*********************************************************************************
280  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
281  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
282  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
283  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
284  {
285  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
286  }
287  //**********************************************************************************************
288 
289  //**Subscript operator**************************************************************************
295  inline ReturnType operator[]( size_t index ) const {
296  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
297 
298  ElementType res;
299 
300  if( mat_.rows() != 0UL ) {
301  res = vec_[0UL] * mat_(0UL,index);
302  for( size_t j=1UL; j<end_; j+=2UL ) {
303  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
304  }
305  if( end_ < mat_.rows() ) {
306  res += vec_[end_] * mat_(end_,index);
307  }
308  }
309  else {
310  reset( res );
311  }
312 
313  return res;
314  }
315  //**********************************************************************************************
316 
317  //**Size function*******************************************************************************
322  inline size_t size() const {
323  return mat_.columns();
324  }
325  //**********************************************************************************************
326 
327  //**Left operand access*************************************************************************
332  inline LeftOperand leftOperand() const {
333  return vec_;
334  }
335  //**********************************************************************************************
336 
337  //**Right operand access************************************************************************
342  inline RightOperand rightOperand() const {
343  return mat_;
344  }
345  //**********************************************************************************************
346 
347  //**********************************************************************************************
353  template< typename T >
354  inline bool canAlias( const T* alias ) const {
355  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
356  }
357  //**********************************************************************************************
358 
359  //**********************************************************************************************
365  template< typename T >
366  inline bool isAliased( const T* alias ) const {
367  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
368  }
369  //**********************************************************************************************
370 
371  //**********************************************************************************************
376  inline bool isAligned() const {
377  return vec_.isAligned() && mat_.isAligned();
378  }
379  //**********************************************************************************************
380 
381  //**********************************************************************************************
386  inline bool canSMPAssign() const {
387  return ( !BLAZE_BLAS_IS_PARALLEL ||
388  ( IsComputation<MT>::value && !evaluateMatrix ) ||
389  ( mat_.rows() * mat_.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
391  }
392  //**********************************************************************************************
393 
394  private:
395  //**Member variables****************************************************************************
398  const size_t end_;
399  //**********************************************************************************************
400 
401  //**Assignment to dense vectors*****************************************************************
414  template< typename VT1 > // Type of the target dense vector
415  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
416  {
418 
419  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
420 
421  if( rhs.mat_.rows() == 0UL ) {
422  reset( ~lhs );
423  return;
424  }
425  else if( rhs.mat_.columns() == 0UL ) {
426  return;
427  }
428 
429  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
430  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
431 
432  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
433  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
434  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
435  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
436 
437  TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
438  }
440  //**********************************************************************************************
441 
442  //**Assignment to dense vectors (kernel selection)**********************************************
453  template< typename VT1 // Type of the left-hand side target vector
454  , typename VT2 // Type of the left-hand side vector operand
455  , typename MT1 > // Type of the right-hand side matrix operand
456  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
457  selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
458  {
459  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
460  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
461  TDVecDMatMultExpr::selectDefaultAssignKernel( y, x, A );
462  else
463  TDVecDMatMultExpr::selectBlasAssignKernel( y, x, A );
464  }
466  //**********************************************************************************************
467 
468  //**Assignment to dense vectors (kernel selection)**********************************************
479  template< typename VT1 // Type of the left-hand side target vector
480  , typename VT2 // Type of the left-hand side vector operand
481  , typename MT1 > // Type of the right-hand side matrix operand
482  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
483  selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
484  {
485  smpAssign( y, x * A );
486  }
488  //**********************************************************************************************
489 
490  //**Default assignment to dense vectors*********************************************************
504  template< typename VT1 // Type of the left-hand side target vector
505  , typename VT2 // Type of the left-hand side vector operand
506  , typename MT1 > // Type of the right-hand side matrix operand
507  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
508  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
509  {
510  const size_t M( A.rows() );
511  const size_t N( A.columns() );
512 
513  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
514  const size_t jend( N & size_t(-2) );
515 
516  for( size_t j=0UL; j<N; ++j ) {
517  y[j] = x[0UL] * A(0UL,j);
518  }
519  for( size_t i=1UL; i<M; ++i ) {
520  for( size_t j=0UL; j<jend; j+=2UL ) {
521  y[j ] += x[i] * A(i,j );
522  y[j+1UL] += x[i] * A(i,j+1UL);
523  }
524  if( jend < N ) {
525  y[jend] += x[i] * A(i,jend);
526  }
527  }
528  }
530  //**********************************************************************************************
531 
532  //**Vectorized default assignment to dense vectors**********************************************
546  template< typename VT1 // Type of the left-hand side target vector
547  , typename VT2 // Type of the left-hand side vector operand
548  , typename MT1 > // Type of the right-hand side matrix operand
549  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
550  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
551  {
552  typedef IntrinsicTrait<ElementType> IT;
553 
554  const size_t M( A.rows() );
555  const size_t N( A.columns() );
556 
557  size_t j( 0UL );
558 
559  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
560  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
561  for( size_t i=0UL; i<M; ++i ) {
562  const IntrinsicType x1( set( x[i] ) );
563  xmm1 = xmm1 + x1 * A.load(i,j );
564  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
565  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
566  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
567  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
568  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
569  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
570  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
571  }
572  y.store( j , xmm1 );
573  y.store( j+IT::size , xmm2 );
574  y.store( j+IT::size*2UL, xmm3 );
575  y.store( j+IT::size*3UL, xmm4 );
576  y.store( j+IT::size*4UL, xmm5 );
577  y.store( j+IT::size*5UL, xmm6 );
578  y.store( j+IT::size*6UL, xmm7 );
579  y.store( j+IT::size*7UL, xmm8 );
580  }
581  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
582  IntrinsicType xmm1, xmm2, xmm3, xmm4;
583  for( size_t i=0UL; i<M; ++i ) {
584  const IntrinsicType x1( set( x[i] ) );
585  xmm1 = xmm1 + x1 * A.load(i,j );
586  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
587  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
588  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
589  }
590  y.store( j , xmm1 );
591  y.store( j+IT::size , xmm2 );
592  y.store( j+IT::size*2UL, xmm3 );
593  y.store( j+IT::size*3UL, xmm4 );
594  }
595  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
596  IntrinsicType xmm1, xmm2, xmm3;
597  for( size_t i=0UL; i<M; ++i ) {
598  const IntrinsicType x1( set( x[i] ) );
599  xmm1 = xmm1 + x1 * A.load(i,j );
600  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
601  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
602  }
603  y.store( j , xmm1 );
604  y.store( j+IT::size , xmm2 );
605  y.store( j+IT::size*2UL, xmm3 );
606  }
607  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
608  IntrinsicType xmm1, xmm2;
609  for( size_t i=0UL; i<M; ++i ) {
610  const IntrinsicType x1( set( x[i] ) );
611  xmm1 = xmm1 + x1 * A.load(i,j );
612  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
613  }
614  y.store( j , xmm1 );
615  y.store( j+IT::size, xmm2 );
616  }
617  if( j < N ) {
618  IntrinsicType xmm1;
619  for( size_t i=0UL; i<M; ++i ) {
620  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
621  }
622  y.store( j, xmm1 );
623  }
624  }
626  //**********************************************************************************************
627 
628  //**BLAS-based assignment to dense vectors (default)********************************************
642  template< typename VT1 // Type of the left-hand side target vector
643  , typename VT2 // Type of the left-hand side vector operand
644  , typename MT1 > // Type of the right-hand side matrix operand
645  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
646  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
647  {
648  selectDefaultAssignKernel( y, x, A );
649  }
651  //**********************************************************************************************
652 
653  //**BLAS-based assignment to dense vectors (single precision)***********************************
654 #if BLAZE_BLAS_MODE
655 
668  template< typename VT1 // Type of the left-hand side target vector
669  , typename VT2 // Type of the left-hand side vector operand
670  , typename MT1 > // Type of the right-hand side matrix operand
671  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
672  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
673  {
674  using boost::numeric_cast;
675 
679 
680  const int M ( numeric_cast<int>( A.rows() ) );
681  const int N ( numeric_cast<int>( A.columns() ) );
682  const int lda( numeric_cast<int>( A.spacing() ) );
683 
684  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
685  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
686  }
688 #endif
689  //**********************************************************************************************
690 
691  //**BLAS-based assignment to dense vectors (double precision)***********************************
692 #if BLAZE_BLAS_MODE
693 
706  template< typename VT1 // Type of the left-hand side target vector
707  , typename VT2 // Type of the left-hand side vector operand
708  , typename MT1 > // Type of the right-hand side matrix operand
709  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
710  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
711  {
712  using boost::numeric_cast;
713 
717 
718  const int M ( numeric_cast<int>( A.rows() ) );
719  const int N ( numeric_cast<int>( A.columns() ) );
720  const int lda( numeric_cast<int>( A.spacing() ) );
721 
722  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
723  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
724  }
726 #endif
727  //**********************************************************************************************
728 
729  //**BLAS-based assignment to dense vectors (single precision complex)***************************
730 #if BLAZE_BLAS_MODE
731 
744  template< typename VT1 // Type of the left-hand side target vector
745  , typename VT2 // Type of the left-hand side vector operand
746  , typename MT1 > // Type of the right-hand side matrix operand
747  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
748  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
749  {
750  using boost::numeric_cast;
751 
755  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
756  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
757  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
758 
759  const int M ( numeric_cast<int>( A.rows() ) );
760  const int N ( numeric_cast<int>( A.columns() ) );
761  const int lda( numeric_cast<int>( A.spacing() ) );
762  const complex<float> alpha( 1.0F, 0.0F );
763  const complex<float> beta ( 0.0F, 0.0F );
764 
765  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
766  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
767  }
769 #endif
770  //**********************************************************************************************
771 
772  //**BLAS-based assignment to dense vectors (double precision complex)***************************
773 #if BLAZE_BLAS_MODE
774 
787  template< typename VT1 // Type of the left-hand side target vector
788  , typename VT2 // Type of the left-hand side vector operand
789  , typename MT1 > // Type of the right-hand side matrix operand
790  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
791  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
792  {
793  using boost::numeric_cast;
794 
798  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
799  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
800  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
801 
802  const int M ( numeric_cast<int>( A.rows() ) );
803  const int N ( numeric_cast<int>( A.columns() ) );
804  const int lda( numeric_cast<int>( A.spacing() ) );
805  const complex<double> alpha( 1.0, 0.0 );
806  const complex<double> beta ( 0.0, 0.0 );
807 
808  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
809  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
810  }
812 #endif
813  //**********************************************************************************************
814 
815  //**Assignment to sparse vectors****************************************************************
828  template< typename VT1 > // Type of the target sparse vector
829  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
830  {
832 
836 
837  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
838 
839  const ResultType tmp( rhs );
840  smpAssign( ~lhs, tmp );
841  }
843  //**********************************************************************************************
844 
845  //**Addition assignment to dense vectors********************************************************
858  template< typename VT1 > // Type of the target dense vector
859  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
860  {
862 
863  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
864 
865  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
866  return;
867  }
868 
869  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
870  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
871 
872  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
873  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
874  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
875  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
876 
877  TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
878  }
880  //**********************************************************************************************
881 
882  //**Addition assignment to dense vectors (kernel selection)*************************************
893  template< typename VT1 // Type of the left-hand side target vector
894  , typename VT2 // Type of the left-hand side vector operand
895  , typename MT1 > // Type of the right-hand side matrix operand
896  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
897  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
898  {
899  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
900  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
901  TDVecDMatMultExpr::selectDefaultAddAssignKernel( y, x, A );
902  else
903  TDVecDMatMultExpr::selectBlasAddAssignKernel( y, x, A );
904  }
906  //**********************************************************************************************
907 
908  //**Addition assignment to dense vectors (kernel selection)*************************************
919  template< typename VT1 // Type of the left-hand side target vector
920  , typename VT2 // Type of the left-hand side vector operand
921  , typename MT1 > // Type of the right-hand side matrix operand
922  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
923  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
924  {
925  smpAddAssign( y, x * A );
926  }
928  //**********************************************************************************************
929 
930  //**Default addition assignment to dense vectors************************************************
944  template< typename VT1 // Type of the left-hand side target vector
945  , typename VT2 // Type of the left-hand side vector operand
946  , typename MT1 > // Type of the right-hand side matrix operand
947  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
948  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
949  {
950  const size_t M( A.rows() );
951  const size_t N( A.columns() );
952 
953  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
954  const size_t jend( N & size_t(-2) );
955 
956  for( size_t i=0UL; i<M; ++i ) {
957  for( size_t j=0UL; j<jend; j+=2UL ) {
958  y[j ] += x[i] * A(i,j );
959  y[j+1UL] += x[i] * A(i,j+1UL);
960  }
961  if( jend < N ) {
962  y[jend] += x[i] * A(i,jend);
963  }
964  }
965  }
967  //**********************************************************************************************
968 
969  //**Vectorized default addition assignment to dense vectors*************************************
983  template< typename VT1 // Type of the left-hand side target vector
984  , typename VT2 // Type of the left-hand side vector operand
985  , typename MT1 > // Type of the right-hand side matrix operand
986  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
987  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
988  {
989  typedef IntrinsicTrait<ElementType> IT;
990 
991  const size_t M( A.rows() );
992  const size_t N( A.columns() );
993 
994  size_t j( 0UL );
995 
996  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
997  IntrinsicType xmm1( y.load(j ) );
998  IntrinsicType xmm2( y.load(j+IT::size ) );
999  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1000  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1001  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1002  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1003  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1004  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1005  for( size_t i=0UL; i<M; ++i ) {
1006  const IntrinsicType x1( set( x[i] ) );
1007  xmm1 = xmm1 + x1 * A.load(i,j );
1008  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1009  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1010  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1011  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
1012  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
1013  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
1014  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
1015  }
1016  y.store( j , xmm1 );
1017  y.store( j+IT::size , xmm2 );
1018  y.store( j+IT::size*2UL, xmm3 );
1019  y.store( j+IT::size*3UL, xmm4 );
1020  y.store( j+IT::size*4UL, xmm5 );
1021  y.store( j+IT::size*5UL, xmm6 );
1022  y.store( j+IT::size*6UL, xmm7 );
1023  y.store( j+IT::size*7UL, xmm8 );
1024  }
1025  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1026  IntrinsicType xmm1( y.load(j ) );
1027  IntrinsicType xmm2( y.load(j+IT::size ) );
1028  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1029  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1030  for( size_t i=0UL; i<M; ++i ) {
1031  const IntrinsicType x1( set( x[i] ) );
1032  xmm1 = xmm1 + x1 * A.load(i,j );
1033  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1034  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1035  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1036  }
1037  y.store( j , xmm1 );
1038  y.store( j+IT::size , xmm2 );
1039  y.store( j+IT::size*2UL, xmm3 );
1040  y.store( j+IT::size*3UL, xmm4 );
1041  }
1042  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1043  IntrinsicType xmm1( y.load(j ) );
1044  IntrinsicType xmm2( y.load(j+IT::size ) );
1045  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1046  for( size_t i=0UL; i<M; ++i ) {
1047  const IntrinsicType x1( set( x[i] ) );
1048  xmm1 = xmm1 + x1 * A.load(i,j );
1049  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1050  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1051  }
1052  y.store( j , xmm1 );
1053  y.store( j+IT::size , xmm2 );
1054  y.store( j+IT::size*2UL, xmm3 );
1055  }
1056  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1057  IntrinsicType xmm1( y.load(j ) );
1058  IntrinsicType xmm2( y.load(j+IT::size) );
1059  for( size_t i=0UL; i<M; ++i ) {
1060  const IntrinsicType x1( set( x[i] ) );
1061  xmm1 = xmm1 + x1 * A.load(i,j );
1062  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1063  }
1064  y.store( j , xmm1 );
1065  y.store( j+IT::size, xmm2 );
1066  }
1067  if( j < N ) {
1068  IntrinsicType xmm1( y.load(j) );
1069  for( size_t i=0UL; i<M; ++i ) {
1070  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
1071  }
1072  y.store( j, xmm1 );
1073  }
1074  }
1076  //**********************************************************************************************
1077 
1078  //**BLAS-based addition assignment to dense vectors (default)***********************************
1092  template< typename VT1 // Type of the left-hand side target vector
1093  , typename VT2 // Type of the left-hand side vector operand
1094  , typename MT1 > // Type of the right-hand side matrix operand
1095  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1096  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1097  {
1098  selectDefaultAddAssignKernel( y, x, A );
1099  }
1101  //**********************************************************************************************
1102 
1103  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1104 #if BLAZE_BLAS_MODE
1105 
1118  template< typename VT1 // Type of the left-hand side target vector
1119  , typename VT2 // Type of the left-hand side vector operand
1120  , typename MT1 > // Type of the right-hand side matrix operand
1121  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1122  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1123  {
1124  using boost::numeric_cast;
1125 
1129 
1130  const int M ( numeric_cast<int>( A.rows() ) );
1131  const int N ( numeric_cast<int>( A.columns() ) );
1132  const int lda( numeric_cast<int>( A.spacing() ) );
1133 
1134  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
1135  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1136  }
1138 #endif
1139  //**********************************************************************************************
1140 
1141  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1142 #if BLAZE_BLAS_MODE
1143 
1156  template< typename VT1 // Type of the left-hand side target vector
1157  , typename VT2 // Type of the left-hand side vector operand
1158  , typename MT1 > // Type of the right-hand side matrix operand
1159  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1160  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1161  {
1162  using boost::numeric_cast;
1163 
1167 
1168  const int M ( numeric_cast<int>( A.rows() ) );
1169  const int N ( numeric_cast<int>( A.columns() ) );
1170  const int lda( numeric_cast<int>( A.spacing() ) );
1171 
1172  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
1173  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1174  }
1176 #endif
1177  //**********************************************************************************************
1178 
1179  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1180 #if BLAZE_BLAS_MODE
1181 
1194  template< typename VT1 // Type of the left-hand side target vector
1195  , typename VT2 // Type of the left-hand side vector operand
1196  , typename MT1 > // Type of the right-hand side matrix operand
1197  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1198  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1199  {
1200  using boost::numeric_cast;
1201 
1205  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1206  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1207  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1208 
1209  const int M ( numeric_cast<int>( A.rows() ) );
1210  const int N ( numeric_cast<int>( A.columns() ) );
1211  const int lda( numeric_cast<int>( A.spacing() ) );
1212  const complex<float> alpha( 1.0F, 0.0F );
1213  const complex<float> beta ( 1.0F, 0.0F );
1214 
1215  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1216  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1217  }
1219 #endif
1220  //**********************************************************************************************
1221 
1222  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1223 #if BLAZE_BLAS_MODE
1224 
1237  template< typename VT1 // Type of the left-hand side target vector
1238  , typename VT2 // Type of the left-hand side vector operand
1239  , typename MT1 > // Type of the right-hand side matrix operand
1240  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1241  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1242  {
1243  using boost::numeric_cast;
1244 
1248  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1249  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1250  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1251 
1252  const int M ( numeric_cast<int>( A.rows() ) );
1253  const int N ( numeric_cast<int>( A.columns() ) );
1254  const int lda( numeric_cast<int>( A.spacing() ) );
1255  const complex<double> alpha( 1.0, 0.0 );
1256  const complex<double> beta ( 1.0, 0.0 );
1257 
1258  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1259  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1260  }
1262 #endif
1263  //**********************************************************************************************
1264 
1265  //**Addition assignment to sparse vectors*******************************************************
1266  // No special implementation for the addition assignment to sparse vectors.
1267  //**********************************************************************************************
1268 
1269  //**Subtraction assignment to dense vectors*****************************************************
1282  template< typename VT1 > // Type of the target dense vector
1283  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1284  {
1286 
1287  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1288 
1289  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1290  return;
1291  }
1292 
1293  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1294  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1295 
1296  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1297  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1298  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1299  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1300 
1301  TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1302  }
1304  //**********************************************************************************************
1305 
1306  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1317  template< typename VT1 // Type of the left-hand side target vector
1318  , typename VT2 // Type of the left-hand side vector operand
1319  , typename MT1 > // Type of the right-hand side matrix operand
1320  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
1321  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1322  {
1323  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1324  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1325  TDVecDMatMultExpr::selectDefaultSubAssignKernel( y, x, A );
1326  else
1327  TDVecDMatMultExpr::selectBlasSubAssignKernel( y, x, A );
1328  }
1330  //**********************************************************************************************
1331 
1332  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1343  template< typename VT1 // Type of the left-hand side target vector
1344  , typename VT2 // Type of the left-hand side vector operand
1345  , typename MT1 > // Type of the right-hand side matrix operand
1346  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1> >::Type
1347  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1348  {
1349  smpSubAssign( y, x * A );
1350  }
1352  //**********************************************************************************************
1353 
1354  //**Default subtraction assignment to dense vectors*********************************************
1368  template< typename VT1 // Type of the left-hand side target vector
1369  , typename VT2 // Type of the left-hand side vector operand
1370  , typename MT1 > // Type of the right-hand side matrix operand
1371  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1372  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1373  {
1374  const size_t M( A.rows() );
1375  const size_t N( A.columns() );
1376 
1377  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1378  const size_t jend( N & size_t(-2) );
1379 
1380  for( size_t i=0UL; i<M; ++i ) {
1381  for( size_t j=0UL; j<jend; j+=2UL ) {
1382  y[j ] -= x[i] * A(i,j );
1383  y[j+1UL] -= x[i] * A(i,j+1UL);
1384  }
1385  if( jend < N ) {
1386  y[jend] -= x[i] * A(i,jend);
1387  }
1388  }
1389  }
1391  //**********************************************************************************************
1392 
1393  //**Vectorized default subtraction assignment to dense vectors**********************************
1407  template< typename VT1 // Type of the left-hand side target vector
1408  , typename VT2 // Type of the left-hand side vector operand
1409  , typename MT1 > // Type of the right-hand side matrix operand
1410  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1411  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1412  {
1413  typedef IntrinsicTrait<ElementType> IT;
1414 
1415  const size_t M( A.rows() );
1416  const size_t N( A.columns() );
1417 
1418  size_t j( 0UL );
1419 
1420  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1421  IntrinsicType xmm1( y.load(j ) );
1422  IntrinsicType xmm2( y.load(j+IT::size ) );
1423  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1424  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1425  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1426  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1427  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1428  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1429  for( size_t i=0UL; i<M; ++i ) {
1430  const IntrinsicType x1( set( x[i] ) );
1431  xmm1 = xmm1 - x1 * A.load(i,j );
1432  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1433  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1434  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1435  xmm5 = xmm5 - x1 * A.load(i,j+IT::size*4UL);
1436  xmm6 = xmm6 - x1 * A.load(i,j+IT::size*5UL);
1437  xmm7 = xmm7 - x1 * A.load(i,j+IT::size*6UL);
1438  xmm8 = xmm8 - x1 * A.load(i,j+IT::size*7UL);
1439  }
1440  y.store( j , xmm1 );
1441  y.store( j+IT::size , xmm2 );
1442  y.store( j+IT::size*2UL, xmm3 );
1443  y.store( j+IT::size*3UL, xmm4 );
1444  y.store( j+IT::size*4UL, xmm5 );
1445  y.store( j+IT::size*5UL, xmm6 );
1446  y.store( j+IT::size*6UL, xmm7 );
1447  y.store( j+IT::size*7UL, xmm8 );
1448  }
1449  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1450  IntrinsicType xmm1( y.load(j ) );
1451  IntrinsicType xmm2( y.load(j+IT::size ) );
1452  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1453  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1454  for( size_t i=0UL; i<M; ++i ) {
1455  const IntrinsicType x1( set( x[i] ) );
1456  xmm1 = xmm1 - x1 * A.load(i,j );
1457  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1458  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1459  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1460  }
1461  y.store( j , xmm1 );
1462  y.store( j+IT::size , xmm2 );
1463  y.store( j+IT::size*2UL, xmm3 );
1464  y.store( j+IT::size*3UL, xmm4 );
1465  }
1466  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1467  IntrinsicType xmm1( y.load(j ) );
1468  IntrinsicType xmm2( y.load(j+IT::size ) );
1469  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1470  for( size_t i=0UL; i<M; ++i ) {
1471  const IntrinsicType x1( set( x[i] ) );
1472  xmm1 = xmm1 - x1 * A.load(i,j );
1473  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1474  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1475  }
1476  y.store( j , xmm1 );
1477  y.store( j+IT::size , xmm2 );
1478  y.store( j+IT::size*2UL, xmm3 );
1479  }
1480  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1481  IntrinsicType xmm1( y.load(j ) );
1482  IntrinsicType xmm2( y.load(j+IT::size) );
1483  for( size_t i=0UL; i<M; ++i ) {
1484  const IntrinsicType x1( set( x[i] ) );
1485  xmm1 = xmm1 - x1 * A.load(i,j );
1486  xmm2 = xmm2 - x1 * A.load(i,j+IT::size);
1487  }
1488  y.store( j , xmm1 );
1489  y.store( j+IT::size, xmm2 );
1490  }
1491  if( j < N ) {
1492  IntrinsicType xmm1( y.load(j) );
1493  for( size_t i=0UL; i<M; ++i ) {
1494  xmm1 = xmm1 - set( x[i] ) * A.load(i,j);
1495  }
1496  y.store( j, xmm1 );
1497  }
1498  }
1500  //**********************************************************************************************
1501 
1502  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1516  template< typename VT1 // Type of the left-hand side target vector
1517  , typename VT2 // Type of the left-hand side vector operand
1518  , typename MT1 > // Type of the right-hand side matrix operand
1519  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1520  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1521  {
1522  selectDefaultSubAssignKernel( y, x, A );
1523  }
1525  //**********************************************************************************************
1526 
1527  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1528 #if BLAZE_BLAS_MODE
1529 
1542  template< typename VT1 // Type of the left-hand side target vector
1543  , typename VT2 // Type of the left-hand side vector operand
1544  , typename MT1 > // Type of the right-hand side matrix operand
1545  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1546  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1547  {
1548  using boost::numeric_cast;
1549 
1553 
1554  const int M ( numeric_cast<int>( A.rows() ) );
1555  const int N ( numeric_cast<int>( A.columns() ) );
1556  const int lda( numeric_cast<int>( A.spacing() ) );
1557 
1558  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -1.0F,
1559  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1560  }
1562 #endif
1563  //**********************************************************************************************
1564 
1565  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1566 #if BLAZE_BLAS_MODE
1567 
1580  template< typename VT1 // Type of the left-hand side target vector
1581  , typename VT2 // Type of the left-hand side vector operand
1582  , typename MT1 > // Type of the right-hand side matrix operand
1583  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1584  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1585  {
1586  using boost::numeric_cast;
1587 
1591 
1592  const int M ( numeric_cast<int>( A.rows() ) );
1593  const int N ( numeric_cast<int>( A.columns() ) );
1594  const int lda( numeric_cast<int>( A.spacing() ) );
1595 
1596  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -1.0,
1597  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1598  }
1600 #endif
1601  //**********************************************************************************************
1602 
1603  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1604 #if BLAZE_BLAS_MODE
1605 
1618  template< typename VT1 // Type of the left-hand side target vector
1619  , typename VT2 // Type of the left-hand side vector operand
1620  , typename MT1 > // Type of the right-hand side matrix operand
1621  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1622  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1623  {
1624  using boost::numeric_cast;
1625 
1629  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1630  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1631  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1632 
1633  const int M ( numeric_cast<int>( A.rows() ) );
1634  const int N ( numeric_cast<int>( A.columns() ) );
1635  const int lda( numeric_cast<int>( A.spacing() ) );
1636  const complex<float> alpha( -1.0F, 0.0F );
1637  const complex<float> beta ( 1.0F, 0.0F );
1638 
1639  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1640  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1641  }
1643 #endif
1644  //**********************************************************************************************
1645 
1646  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1647 #if BLAZE_BLAS_MODE
1648 
1661  template< typename VT1 // Type of the left-hand side target vector
1662  , typename VT2 // Type of the left-hand side vector operand
1663  , typename MT1 > // Type of the right-hand side matrix operand
1664  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1665  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1666  {
1667  using boost::numeric_cast;
1668 
1672  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1673  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1674  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1675 
1676  const int M ( numeric_cast<int>( A.rows() ) );
1677  const int N ( numeric_cast<int>( A.columns() ) );
1678  const int lda( numeric_cast<int>( A.spacing() ) );
1679  const complex<double> alpha( -1.0, 0.0 );
1680  const complex<double> beta ( 1.0, 0.0 );
1681 
1682  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1683  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1684  }
1686 #endif
1687  //**********************************************************************************************
1688 
1689  //**Subtraction assignment to sparse vectors****************************************************
1690  // No special implementation for the subtraction assignment to sparse vectors.
1691  //**********************************************************************************************
1692 
1693  //**Multiplication assignment to dense vectors**************************************************
1706  template< typename VT1 > // Type of the target dense vector
1707  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1708  {
1710 
1714 
1715  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1716 
1717  const ResultType tmp( rhs );
1718  multAssign( ~lhs, tmp );
1719  }
1721  //**********************************************************************************************
1722 
1723  //**Multiplication assignment to sparse vectors*************************************************
1724  // No special implementation for the multiplication assignment to sparse vectors.
1725  //**********************************************************************************************
1726 
1727  //**Compile time checks*************************************************************************
1734  //**********************************************************************************************
1735 };
1736 //*************************************************************************************************
1737 
1738 
1739 
1740 
1741 //=================================================================================================
1742 //
1743 // DVECSCALARMULTEXPR SPECIALIZATION
1744 //
1745 //=================================================================================================
1746 
1747 //*************************************************************************************************
1755 template< typename VT // Type of the left-hand side dense vector
1756  , typename MT // Type of the right-hand side dense matrix
1757  , typename ST > // Type of the side scalar value
1758 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
1759  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1760  , private VecScalarMultExpr
1761  , private Computation
1762 {
1763  private:
1764  //**Type definitions****************************************************************************
1765  typedef TDVecDMatMultExpr<VT,MT> VMM;
1766  typedef typename VMM::ResultType RES;
1767  typedef typename VT::ResultType VRT;
1768  typedef typename MT::ResultType MRT;
1769  typedef typename VRT::ElementType VET;
1770  typedef typename MRT::ElementType MET;
1771  typedef typename VT::CompositeType VCT;
1772  typedef typename MT::CompositeType MCT;
1773  //**********************************************************************************************
1774 
1775  //**********************************************************************************************
1777  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1778  //**********************************************************************************************
1779 
1780  //**********************************************************************************************
1782  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1783  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1784  //**********************************************************************************************
1785 
1786  //**********************************************************************************************
1788 
1790  template< typename T1, typename T2, typename T3, typename T4 >
1791  struct UseSMPAssignKernel {
1792  enum { value = evaluateVector || evaluateMatrix };
1793  };
1794  //**********************************************************************************************
1795 
1796  //**********************************************************************************************
1798 
1801  template< typename T1, typename T2, typename T3, typename T4 >
1802  struct UseSinglePrecisionKernel {
1803  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1804  IsFloat<typename T1::ElementType>::value &&
1805  IsFloat<typename T2::ElementType>::value &&
1806  IsFloat<typename T3::ElementType>::value &&
1807  !IsComplex<T4>::value };
1808  };
1809  //**********************************************************************************************
1810 
1811  //**********************************************************************************************
1813 
1816  template< typename T1, typename T2, typename T3, typename T4 >
1817  struct UseDoublePrecisionKernel {
1818  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1819  IsDouble<typename T1::ElementType>::value &&
1820  IsDouble<typename T2::ElementType>::value &&
1821  IsDouble<typename T3::ElementType>::value &&
1822  !IsComplex<T4>::value };
1823  };
1824  //**********************************************************************************************
1825 
1826  //**********************************************************************************************
1828 
1831  template< typename T1, typename T2, typename T3 >
1832  struct UseSinglePrecisionComplexKernel {
1833  typedef complex<float> Type;
1834  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1835  IsSame<typename T1::ElementType,Type>::value &&
1836  IsSame<typename T2::ElementType,Type>::value &&
1837  IsSame<typename T3::ElementType,Type>::value };
1838  };
1839  //**********************************************************************************************
1840 
1841  //**********************************************************************************************
1843 
1846  template< typename T1, typename T2, typename T3 >
1847  struct UseDoublePrecisionComplexKernel {
1848  typedef complex<double> Type;
1849  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1850  IsSame<typename T1::ElementType,Type>::value &&
1851  IsSame<typename T2::ElementType,Type>::value &&
1852  IsSame<typename T3::ElementType,Type>::value };
1853  };
1854  //**********************************************************************************************
1855 
1856  //**********************************************************************************************
1858 
1860  template< typename T1, typename T2, typename T3, typename T4 >
1861  struct UseDefaultKernel {
1862  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1863  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1864  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1865  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1866  };
1867  //**********************************************************************************************
1868 
1869  //**********************************************************************************************
1871 
1874  template< typename T1, typename T2, typename T3, typename T4 >
1875  struct UseVectorizedDefaultKernel {
1876  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1877  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1878  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1879  IsSame<typename T1::ElementType,T4>::value &&
1880  IntrinsicTrait<typename T1::ElementType>::addition &&
1881  IntrinsicTrait<typename T1::ElementType>::multiplication };
1882  };
1883  //**********************************************************************************************
1884 
1885  public:
1886  //**Type definitions****************************************************************************
1887  typedef DVecScalarMultExpr<VMM,ST,true> This;
1888  typedef typename MultTrait<RES,ST>::Type ResultType;
1889  typedef typename ResultType::TransposeType TransposeType;
1890  typedef typename ResultType::ElementType ElementType;
1891  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1892  typedef const ElementType ReturnType;
1893  typedef const ResultType CompositeType;
1894 
1896  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
1897 
1899  typedef ST RightOperand;
1900 
1902  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
1903 
1905  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
1906  //**********************************************************************************************
1907 
1908  //**Compilation flags***************************************************************************
1910  enum { vectorizable = VT::vectorizable && MT::vectorizable &&
1911  IsSame<VET,MET>::value &&
1912  IsSame<VET,ST>::value &&
1913  IntrinsicTrait<VET>::addition &&
1914  IntrinsicTrait<VET>::multiplication };
1915 
1917  enum { smpAssignable = !evaluateVector && !evaluateMatrix };
1918  //**********************************************************************************************
1919 
1920  //**Constructor*********************************************************************************
1926  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1927  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1928  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1929  {}
1930  //**********************************************************************************************
1931 
1932  //**Subscript operator**************************************************************************
1938  inline ReturnType operator[]( size_t index ) const {
1939  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1940  return vector_[index] * scalar_;
1941  }
1942  //**********************************************************************************************
1943 
1944  //**Size function*******************************************************************************
1949  inline size_t size() const {
1950  return vector_.size();
1951  }
1952  //**********************************************************************************************
1953 
1954  //**Left operand access*************************************************************************
1959  inline LeftOperand leftOperand() const {
1960  return vector_;
1961  }
1962  //**********************************************************************************************
1963 
1964  //**Right operand access************************************************************************
1969  inline RightOperand rightOperand() const {
1970  return scalar_;
1971  }
1972  //**********************************************************************************************
1973 
1974  //**********************************************************************************************
1980  template< typename T >
1981  inline bool canAlias( const T* alias ) const {
1982  return vector_.canAlias( alias );
1983  }
1984  //**********************************************************************************************
1985 
1986  //**********************************************************************************************
1992  template< typename T >
1993  inline bool isAliased( const T* alias ) const {
1994  return vector_.isAliased( alias );
1995  }
1996  //**********************************************************************************************
1997 
1998  //**********************************************************************************************
2003  inline bool isAligned() const {
2004  return vector_.isAligned();
2005  }
2006  //**********************************************************************************************
2007 
2008  //**********************************************************************************************
2013  inline bool canSMPAssign() const {
2014  typename VMM::RightOperand A( vector_.rightOperand() );
2015  return ( !BLAZE_BLAS_IS_PARALLEL ||
2016  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2017  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
2019  }
2020  //**********************************************************************************************
2021 
2022  private:
2023  //**Member variables****************************************************************************
2024  LeftOperand vector_;
2025  RightOperand scalar_;
2026  //**********************************************************************************************
2027 
2028  //**Assignment to dense vectors*****************************************************************
2040  template< typename VT1 > // Type of the target dense vector
2041  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2042  {
2044 
2045  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2046 
2047  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2048  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2049 
2050  if( right.rows() == 0UL ) {
2051  reset( ~lhs );
2052  return;
2053  }
2054  else if( right.columns() == 0UL ) {
2055  return;
2056  }
2057 
2058  LT x( left ); // Evaluation of the left-hand side dense vector operand
2059  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2060 
2061  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2062  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2063  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2064  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2065 
2066  DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2067  }
2068  //**********************************************************************************************
2069 
2070  //**Assignment to dense vectors (kernel selection)**********************************************
2081  template< typename VT1 // Type of the left-hand side target vector
2082  , typename VT2 // Type of the left-hand side vector operand
2083  , typename MT1 // Type of the right-hand side matrix operand
2084  , typename ST2 > // Type of the scalar value
2085  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2086  selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2087  {
2088  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2089  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2090  DVecScalarMultExpr::selectDefaultAssignKernel( y, x, A, scalar );
2091  else
2092  DVecScalarMultExpr::selectBlasAssignKernel( y, x, A, scalar );
2093  }
2094  //**********************************************************************************************
2095 
2096  //**Assignment to dense vectors (kernel selection)**********************************************
2107  template< typename VT1 // Type of the left-hand side target vector
2108  , typename VT2 // Type of the left-hand side vector operand
2109  , typename MT1 // Type of the right-hand side matrix operand
2110  , typename ST2 > // Type of the scalar value
2111  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2112  selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2113  {
2114  smpAssign( y, x * A * scalar );
2115  }
2116  //**********************************************************************************************
2117 
2118  //**Default assignment to dense vectors*********************************************************
2132  template< typename VT1 // Type of the left-hand side target vector
2133  , typename VT2 // Type of the left-hand side vector operand
2134  , typename MT1 // Type of the right-hand side matrix operand
2135  , typename ST2 > // Type of the scalar value
2136  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2137  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2138  {
2139  const size_t M( A.rows() );
2140  const size_t N( A.columns() );
2141 
2142  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
2143  const size_t jend( N & size_t(-2) );
2144 
2145  for( size_t j=0UL; j<N; ++j ) {
2146  y[j] = x[0UL] * A(0UL,j);
2147  }
2148  for( size_t i=1UL; i<M; ++i ) {
2149  for( size_t j=0UL; j<jend; j+=2UL ) {
2150  y[j ] += x[i] * A(i,j );
2151  y[j+1UL] += x[i] * A(i,j+1UL);
2152  }
2153  if( jend < N ) {
2154  y[jend] += x[i] * A(i,jend);
2155  }
2156  }
2157  for( size_t j=0UL; j<N; ++j ) {
2158  y[j] *= scalar;
2159  }
2160  }
2161  //**********************************************************************************************
2162 
2163  //**Default assignment to dense vectors*********************************************************
2177  template< typename VT1 // Type of the left-hand side target vector
2178  , typename VT2 // Type of the left-hand side vector operand
2179  , typename MT1 // Type of the right-hand side matrix operand
2180  , typename ST2 > // Type of the scalar value
2181  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2182  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2183  {
2184  typedef IntrinsicTrait<ElementType> IT;
2185 
2186  const size_t M( A.rows() );
2187  const size_t N( A.columns() );
2188 
2189  const IntrinsicType factor( set( scalar ) );
2190 
2191  size_t j( 0UL );
2192 
2193  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2194  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2195  for( size_t i=0UL; i<M; ++i ) {
2196  const IntrinsicType x1( set( x[i] ) );
2197  xmm1 = xmm1 + x1 * A.load(i,j );
2198  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2199  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2200  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2201  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2202  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2203  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2204  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2205  }
2206  y.store( j , xmm1*factor );
2207  y.store( j+IT::size , xmm2*factor );
2208  y.store( j+IT::size*2UL, xmm3*factor );
2209  y.store( j+IT::size*3UL, xmm4*factor );
2210  y.store( j+IT::size*4UL, xmm5*factor );
2211  y.store( j+IT::size*5UL, xmm6*factor );
2212  y.store( j+IT::size*6UL, xmm7*factor );
2213  y.store( j+IT::size*7UL, xmm8*factor );
2214  }
2215  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2216  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2217  for( size_t i=0UL; i<M; ++i ) {
2218  const IntrinsicType x1( set( x[i] ) );
2219  xmm1 = xmm1 + x1 * A.load(i,j );
2220  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2221  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2222  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2223  }
2224  y.store( j , xmm1*factor );
2225  y.store( j+IT::size , xmm2*factor );
2226  y.store( j+IT::size*2UL, xmm3*factor );
2227  y.store( j+IT::size*3UL, xmm4*factor );
2228  }
2229  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2230  IntrinsicType xmm1, xmm2, xmm3;
2231  for( size_t i=0UL; i<M; ++i ) {
2232  const IntrinsicType x1( set( x[i] ) );
2233  xmm1 = xmm1 + x1 * A.load(i,j );
2234  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2235  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2236  }
2237  y.store( j , xmm1*factor );
2238  y.store( j+IT::size , xmm2*factor );
2239  y.store( j+IT::size*2UL, xmm3*factor );
2240  }
2241  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2242  IntrinsicType xmm1, xmm2;
2243  for( size_t i=0UL; i<M; ++i ) {
2244  const IntrinsicType x1( set( x[i] ) );
2245  xmm1 = xmm1 + x1 * A.load(i,j );
2246  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2247  }
2248  y.store( j , xmm1*factor );
2249  y.store( j+IT::size, xmm2*factor );
2250  }
2251  if( j < N ) {
2252  IntrinsicType xmm1;
2253  for( size_t i=0UL; i<M; ++i ) {
2254  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2255  }
2256  y.store( j, xmm1*factor );
2257  }
2258  }
2259  //**********************************************************************************************
2260 
2261  //**BLAS-based assignment to dense vectors (default)********************************************
2274  template< typename VT1 // Type of the left-hand side target vector
2275  , typename VT2 // Type of the left-hand side vector operand
2276  , typename MT1 // Type of the right-hand side matrix operand
2277  , typename ST2 > // Type of the scalar value
2278  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2279  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2280  {
2281  selectDefaultAssignKernel( y, x, A, scalar );
2282  }
2283  //**********************************************************************************************
2284 
2285  //**BLAS-based assignment to dense vectors (single precision)***********************************
2286 #if BLAZE_BLAS_MODE
2287 
2300  template< typename VT1 // Type of the left-hand side target vector
2301  , typename VT2 // Type of the left-hand side vector operand
2302  , typename MT1 // Type of the right-hand side matrix operand
2303  , typename ST2 > // Type of the scalar value
2304  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2305  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2306  {
2307  using boost::numeric_cast;
2308 
2312 
2313  const int M ( numeric_cast<int>( A.rows() ) );
2314  const int N ( numeric_cast<int>( A.columns() ) );
2315  const int lda( numeric_cast<int>( A.spacing() ) );
2316 
2317  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2318  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2319  }
2320 #endif
2321  //**********************************************************************************************
2322 
2323  //**BLAS-based assignment to dense vectors (double precision)***********************************
2324 #if BLAZE_BLAS_MODE
2325 
2338  template< typename VT1 // Type of the left-hand side target vector
2339  , typename VT2 // Type of the left-hand side vector operand
2340  , typename MT1 // Type of the right-hand side matrix operand
2341  , typename ST2 > // Type of the scalar value
2342  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2343  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2344  {
2345  using boost::numeric_cast;
2346 
2350 
2351  const int M ( numeric_cast<int>( A.rows() ) );
2352  const int N ( numeric_cast<int>( A.columns() ) );
2353  const int lda( numeric_cast<int>( A.spacing() ) );
2354 
2355  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2356  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2357  }
2358 #endif
2359  //**********************************************************************************************
2360 
2361  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2362 #if BLAZE_BLAS_MODE
2363 
2376  template< typename VT1 // Type of the left-hand side target vector
2377  , typename VT2 // Type of the left-hand side vector operand
2378  , typename MT1 // Type of the right-hand side matrix operand
2379  , typename ST2 > // Type of the scalar value
2380  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2381  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2382  {
2383  using boost::numeric_cast;
2384 
2388  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2389  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2390  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2391 
2392  const int M ( numeric_cast<int>( A.rows() ) );
2393  const int N ( numeric_cast<int>( A.columns() ) );
2394  const int lda( numeric_cast<int>( A.spacing() ) );
2395  const complex<float> alpha( scalar );
2396  const complex<float> beta ( 0.0F, 0.0F );
2397 
2398  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2399  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2400  }
2401 #endif
2402  //**********************************************************************************************
2403 
2404  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2405 #if BLAZE_BLAS_MODE
2406 
2419  template< typename VT1 // Type of the left-hand side target vector
2420  , typename VT2 // Type of the left-hand side vector operand
2421  , typename MT1 // Type of the right-hand side matrix operand
2422  , typename ST2 > // Type of the scalar value
2423  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2424  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2425  {
2426  using boost::numeric_cast;
2427 
2431  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2432  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2433  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2434 
2435  const int M ( numeric_cast<int>( A.rows() ) );
2436  const int N ( numeric_cast<int>( A.columns() ) );
2437  const int lda( numeric_cast<int>( A.spacing() ) );
2438  const complex<double> alpha( scalar );
2439  const complex<double> beta ( 0.0, 0.0 );
2440 
2441  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2442  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2443  }
2444 #endif
2445  //**********************************************************************************************
2446 
2447  //**Assignment to sparse vectors****************************************************************
2459  template< typename VT1 > // Type of the target sparse vector
2460  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2461  {
2463 
2467 
2468  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2469 
2470  const ResultType tmp( rhs );
2471  smpAssign( ~lhs, tmp );
2472  }
2473  //**********************************************************************************************
2474 
2475  //**Addition assignment to dense vectors********************************************************
2487  template< typename VT1 > // Type of the target dense vector
2488  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2489  {
2491 
2492  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2493 
2494  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2495  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2496 
2497  if( right.rows() == 0UL || right.columns() == 0UL ) {
2498  return;
2499  }
2500 
2501  LT x( left ); // Evaluation of the left-hand side dense vector operand
2502  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2503 
2504  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2505  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2506  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2507  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2508 
2509  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2510  }
2511  //**********************************************************************************************
2512 
2513  //**Addition assignment to dense vectors (kernel selection)*************************************
2524  template< typename VT1 // Type of the left-hand side target vector
2525  , typename VT2 // Type of the left-hand side vector operand
2526  , typename MT1 // Type of the right-hand side matrix operand
2527  , typename ST2 > // Type of the scalar value
2528  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2529  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2530  {
2531  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2532  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2533  DVecScalarMultExpr::selectDefaultAddAssignKernel( y, x, A, scalar );
2534  else
2535  DVecScalarMultExpr::selectBlasAddAssignKernel( y, x, A, scalar );
2536  }
2537  //**********************************************************************************************
2538 
2539  //**Addition assignment to dense vectors (kernel selection)*************************************
2550  template< typename VT1 // Type of the left-hand side target vector
2551  , typename VT2 // Type of the left-hand side vector operand
2552  , typename MT1 // Type of the right-hand side matrix operand
2553  , typename ST2 > // Type of the scalar value
2554  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2555  selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2556  {
2557  smpAddAssign( y, x * A * scalar );
2558  }
2559  //**********************************************************************************************
2560 
2561  //**Default addition assignment to dense vectors************************************************
2575  template< typename VT1 // Type of the left-hand side target vector
2576  , typename VT2 // Type of the left-hand side vector operand
2577  , typename MT1 // Type of the right-hand side matrix operand
2578  , typename ST2 > // Type of the scalar value
2579  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2580  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2581  {
2582  y.addAssign( x * A * scalar );
2583  }
2584  //**********************************************************************************************
2585 
2586  //**Vectorized default addition assignment to dense vectors*************************************
2600  template< typename VT1 // Type of the left-hand side target vector
2601  , typename VT2 // Type of the left-hand side vector operand
2602  , typename MT1 // Type of the right-hand side matrix operand
2603  , typename ST2 > // Type of the scalar value
2604  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2605  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2606  {
2607  typedef IntrinsicTrait<ElementType> IT;
2608 
2609  const size_t M( A.rows() );
2610  const size_t N( A.columns() );
2611 
2612  const IntrinsicType factor( set( scalar ) );
2613 
2614  size_t j( 0UL );
2615 
2616  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2617  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2618  for( size_t i=0UL; i<M; ++i ) {
2619  const IntrinsicType x1( set( x[i] ) );
2620  xmm1 = xmm1 + x1 * A.load(i,j );
2621  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2622  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2623  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2624  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2625  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2626  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2627  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2628  }
2629  y.store( j , y.load(j ) + xmm1*factor );
2630  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2631  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2632  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2633  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
2634  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
2635  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
2636  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
2637  }
2638  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2639  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2640  for( size_t i=0UL; i<M; ++i ) {
2641  const IntrinsicType x1( set( x[i] ) );
2642  xmm1 = xmm1 + x1 * A.load(i,j );
2643  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2644  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2645  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2646  }
2647  y.store( j , y.load(j ) + xmm1*factor );
2648  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2649  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2650  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2651  }
2652  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2653  IntrinsicType xmm1, xmm2, xmm3;
2654  for( size_t i=0UL; i<M; ++i ) {
2655  const IntrinsicType x1( set( x[i] ) );
2656  xmm1 = xmm1 + x1 * A.load(i,j );
2657  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2658  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2659  }
2660  y.store( j , y.load(j ) + xmm1*factor );
2661  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2662  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2663  }
2664  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2665  IntrinsicType xmm1, xmm2;
2666  for( size_t i=0UL; i<M; ++i ) {
2667  const IntrinsicType x1( set( x[i] ) );
2668  xmm1 = xmm1 + x1 * A.load(i,j );
2669  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2670  }
2671  y.store( j , y.load(j ) + xmm1*factor );
2672  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
2673  }
2674  if( j < N ) {
2675  IntrinsicType xmm1;
2676  for( size_t i=0UL; i<M; ++i ) {
2677  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2678  }
2679  y.store( j, y.load(j) + xmm1*factor );
2680  }
2681  }
2682  //**********************************************************************************************
2683 
2684  //**BLAS-based addition assignment to dense vectors (default)***********************************
2698  template< typename VT1 // Type of the left-hand side target vector
2699  , typename VT2 // Type of the left-hand side vector operand
2700  , typename MT1 // Type of the right-hand side matrix operand
2701  , typename ST2 > // Type of the scalar value
2702  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2703  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2704  {
2705  selectDefaultAddAssignKernel( y, x, A, scalar );
2706  }
2707  //**********************************************************************************************
2708 
2709  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2710 #if BLAZE_BLAS_MODE
2711 
2724  template< typename VT1 // Type of the left-hand side target vector
2725  , typename VT2 // Type of the left-hand side vector operand
2726  , typename MT1 // Type of the right-hand side matrix operand
2727  , typename ST2 > // Type of the scalar value
2728  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2729  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2730  {
2731  using boost::numeric_cast;
2732 
2736 
2737  const int M ( numeric_cast<int>( A.rows() ) );
2738  const int N ( numeric_cast<int>( A.columns() ) );
2739  const int lda( numeric_cast<int>( A.spacing() ) );
2740 
2741  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2742  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2743  }
2744 #endif
2745  //**********************************************************************************************
2746 
2747  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2748 #if BLAZE_BLAS_MODE
2749 
2762  template< typename VT1 // Type of the left-hand side target vector
2763  , typename VT2 // Type of the left-hand side vector operand
2764  , typename MT1 // Type of the right-hand side matrix operand
2765  , typename ST2 > // Type of the scalar value
2766  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2767  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2768  {
2769  using boost::numeric_cast;
2770 
2774 
2775  const int M ( numeric_cast<int>( A.rows() ) );
2776  const int N ( numeric_cast<int>( A.columns() ) );
2777  const int lda( numeric_cast<int>( A.spacing() ) );
2778 
2779  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2780  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2781  }
2782 #endif
2783  //**********************************************************************************************
2784 
2785  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2786 #if BLAZE_BLAS_MODE
2787 
2800  template< typename VT1 // Type of the left-hand side target vector
2801  , typename VT2 // Type of the left-hand side vector operand
2802  , typename MT1 // Type of the right-hand side matrix operand
2803  , typename ST2 > // Type of the scalar value
2804  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2805  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2806  {
2807  using boost::numeric_cast;
2808 
2812  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2813  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2814  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2815 
2816  const int M ( numeric_cast<int>( A.rows() ) );
2817  const int N ( numeric_cast<int>( A.columns() ) );
2818  const int lda( numeric_cast<int>( A.spacing() ) );
2819  const complex<float> alpha( scalar );
2820  const complex<float> beta ( 1.0F, 0.0F );
2821 
2822  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2823  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2824  }
2825 #endif
2826  //**********************************************************************************************
2827 
2828  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2829 #if BLAZE_BLAS_MODE
2830 
2843  template< typename VT1 // Type of the left-hand side target vector
2844  , typename VT2 // Type of the left-hand side vector operand
2845  , typename MT1 // Type of the right-hand side matrix operand
2846  , typename ST2 > // Type of the scalar value
2847  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2848  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2849  {
2850  using boost::numeric_cast;
2851 
2855  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2856  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2857  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2858 
2859  const int M ( numeric_cast<int>( A.rows() ) );
2860  const int N ( numeric_cast<int>( A.columns() ) );
2861  const int lda( numeric_cast<int>( A.spacing() ) );
2862  const complex<double> alpha( scalar );
2863  const complex<double> beta ( 1.0, 0.0 );
2864 
2865  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2866  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2867  }
2868 #endif
2869  //**********************************************************************************************
2870 
2871  //**Addition assignment to sparse vectors*******************************************************
2872  // No special implementation for the addition assignment to sparse vectors.
2873  //**********************************************************************************************
2874 
2875  //**Subtraction assignment to dense vectors*****************************************************
2887  template< typename VT1 > // Type of the target dense vector
2888  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2889  {
2891 
2892  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2893 
2894  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2895  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2896 
2897  if( right.rows() == 0UL || right.columns() == 0UL ) {
2898  return;
2899  }
2900 
2901  LT x( left ); // Evaluation of the left-hand side dense vector operand
2902  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2903 
2904  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2905  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2906  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2907  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2908 
2909  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2910  }
2911  //**********************************************************************************************
2912 
2913  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2924  template< typename VT1 // Type of the left-hand side target vector
2925  , typename VT2 // Type of the left-hand side vector operand
2926  , typename MT1 // Type of the right-hand side matrix operand
2927  , typename ST2 > // Type of the scalar value
2928  static inline typename DisableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2929  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2930  {
2931  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2932  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2933  DVecScalarMultExpr::selectDefaultSubAssignKernel( y, x, A, scalar );
2934  else
2935  DVecScalarMultExpr::selectBlasSubAssignKernel( y, x, A, scalar );
2936  }
2937  //**********************************************************************************************
2938 
2939  //**Subtraction assignment to dense vectors (kernel selection)**********************************
2950  template< typename VT1 // Type of the left-hand side target vector
2951  , typename VT2 // Type of the left-hand side vector operand
2952  , typename MT1 // Type of the right-hand side matrix operand
2953  , typename ST2 > // Type of the scalar value
2954  static inline typename EnableIf< UseSMPAssignKernel<VT1,VT2,MT1,ST2> >::Type
2955  selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2956  {
2957  smpSubAssign( y, x * A * scalar );
2958  }
2959  //**********************************************************************************************
2960 
2961  //**Default subtraction assignment to dense vectors*********************************************
2975  template< typename VT1 // Type of the left-hand side target vector
2976  , typename VT2 // Type of the left-hand side vector operand
2977  , typename MT1 // Type of the right-hand side matrix operand
2978  , typename ST2 > // Type of the scalar value
2979  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2980  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2981  {
2982  y.subAssign( x * A * scalar );
2983  }
2984  //**********************************************************************************************
2985 
2986  //**Vectorized default subtraction assignment to dense vectors**********************************
3000  template< typename VT1 // Type of the left-hand side target vector
3001  , typename VT2 // Type of the left-hand side vector operand
3002  , typename MT1 // Type of the right-hand side matrix operand
3003  , typename ST2 > // Type of the scalar value
3004  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3005  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3006  {
3007  typedef IntrinsicTrait<ElementType> IT;
3008 
3009  const size_t M( A.rows() );
3010  const size_t N( A.columns() );
3011 
3012  const IntrinsicType factor( set( scalar ) );
3013 
3014  size_t j( 0UL );
3015 
3016  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3017  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3018  for( size_t i=0UL; i<M; ++i ) {
3019  const IntrinsicType x1( set( x[i] ) );
3020  xmm1 = xmm1 + x1 * A.load(i,j );
3021  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3022  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3023  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3024  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3025  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3026  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3027  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3028  }
3029  y.store( j , y.load(j ) - xmm1*factor );
3030  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3031  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3032  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
3033  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
3034  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
3035  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
3036  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
3037  }
3038  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3039  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3040  for( size_t i=0UL; i<M; ++i ) {
3041  const IntrinsicType x1( set( x[i] ) );
3042  xmm1 = xmm1 + x1 * A.load(i,j );
3043  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3044  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3045  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3046  }
3047  y.store( j , y.load(j ) - xmm1*factor );
3048  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3049  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3050  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
3051  }
3052  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
3053  IntrinsicType xmm1, xmm2, xmm3;
3054  for( size_t i=0UL; i<M; ++i ) {
3055  const IntrinsicType x1( set( x[i] ) );
3056  xmm1 = xmm1 + x1 * A.load(i,j );
3057  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3058  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3059  }
3060  y.store( j , y.load(j ) - xmm1*factor );
3061  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3062  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3063  }
3064  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3065  IntrinsicType xmm1, xmm2;
3066  for( size_t i=0UL; i<M; ++i ) {
3067  const IntrinsicType x1( set( x[i] ) );
3068  xmm1 = xmm1 + x1 * A.load(i,j );
3069  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3070  }
3071  y.store( j , y.load(j ) - xmm1*factor );
3072  y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
3073  }
3074  if( j < N ) {
3075  IntrinsicType xmm1;
3076  for( size_t i=0UL; i<M; ++i ) {
3077  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3078  }
3079  y.store( j, y.load(j) - xmm1*factor );
3080  }
3081  }
3082  //**********************************************************************************************
3083 
3084  //**BLAS-based subtraction assignment to dense vectors (default)********************************
3098  template< typename VT1 // Type of the left-hand side target vector
3099  , typename VT2 // Type of the left-hand side vector operand
3100  , typename MT1 // Type of the right-hand side matrix operand
3101  , typename ST2 > // Type of the scalar value
3102  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3103  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3104  {
3105  selectDefaultSubAssignKernel( y, x, A, scalar );
3106  }
3107  //**********************************************************************************************
3108 
3109  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
3110 #if BLAZE_BLAS_MODE
3111 
3124  template< typename VT1 // Type of the left-hand side target vector
3125  , typename VT2 // Type of the left-hand side vector operand
3126  , typename MT1 // Type of the right-hand side matrix operand
3127  , typename ST2 > // Type of the scalar value
3128  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3129  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3130  {
3131  using boost::numeric_cast;
3132 
3136 
3137  const int M ( numeric_cast<int>( A.rows() ) );
3138  const int N ( numeric_cast<int>( A.columns() ) );
3139  const int lda( numeric_cast<int>( A.spacing() ) );
3140 
3141  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
3142  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
3143  }
3144 #endif
3145  //**********************************************************************************************
3146 
3147  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
3148 #if BLAZE_BLAS_MODE
3149 
3162  template< typename VT1 // Type of the left-hand side target vector
3163  , typename VT2 // Type of the left-hand side vector operand
3164  , typename MT1 // Type of the right-hand side matrix operand
3165  , typename ST2 > // Type of the scalar value
3166  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3167  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3168  {
3169  using boost::numeric_cast;
3170 
3174 
3175  const int M ( numeric_cast<int>( A.rows() ) );
3176  const int N ( numeric_cast<int>( A.columns() ) );
3177  const int lda( numeric_cast<int>( A.spacing() ) );
3178 
3179  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
3180  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
3181  }
3182 #endif
3183  //**********************************************************************************************
3184 
3185  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
3186 #if BLAZE_BLAS_MODE
3187 
3200  template< typename VT1 // Type of the left-hand side target vector
3201  , typename VT2 // Type of the left-hand side vector operand
3202  , typename MT1 // Type of the right-hand side matrix operand
3203  , typename ST2 > // Type of the scalar value
3204  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3205  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3206  {
3207  using boost::numeric_cast;
3208 
3212  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
3213  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
3214  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
3215 
3216  const int M ( numeric_cast<int>( A.rows() ) );
3217  const int N ( numeric_cast<int>( A.columns() ) );
3218  const int lda( numeric_cast<int>( A.spacing() ) );
3219  const complex<float> alpha( -scalar );
3220  const complex<float> beta ( 1.0F, 0.0F );
3221 
3222  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
3223  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3224  }
3225 #endif
3226  //**********************************************************************************************
3227 
3228  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
3229 #if BLAZE_BLAS_MODE
3230 
3243  template< typename VT1 // Type of the left-hand side target vector
3244  , typename VT2 // Type of the left-hand side vector operand
3245  , typename MT1 // Type of the right-hand side matrix operand
3246  , typename ST2 > // Type of the scalar value
3247  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3248  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3249  {
3250  using boost::numeric_cast;
3251 
3255  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
3256  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
3257  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
3258 
3259  const int M ( numeric_cast<int>( A.rows() ) );
3260  const int N ( numeric_cast<int>( A.columns() ) );
3261  const int lda( numeric_cast<int>( A.spacing() ) );
3262  const complex<double> alpha( -scalar );
3263  const complex<double> beta ( 1.0, 0.0 );
3264 
3265  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
3266  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3267  }
3268 #endif
3269  //**********************************************************************************************
3270 
3271  //**Subtraction assignment to sparse vectors****************************************************
3272  // No special implementation for the subtraction assignment to sparse vectors.
3273  //**********************************************************************************************
3274 
3275  //**Multiplication assignment to dense vectors**************************************************
3287  template< typename VT1 > // Type of the target dense vector
3288  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3289  {
3291 
3295 
3296  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3297 
3298  const ResultType tmp( rhs );
3299  multAssign( ~lhs, tmp );
3300  }
3301  //**********************************************************************************************
3302 
3303  //**Multiplication assignment to sparse vectors*************************************************
3304  // No special implementation for the multiplication assignment to sparse vectors.
3305  //**********************************************************************************************
3306 
3307  //**Compile time checks*************************************************************************
3316  //**********************************************************************************************
3317 };
3319 //*************************************************************************************************
3320 
3321 
3322 
3323 
3324 //=================================================================================================
3325 //
3326 // GLOBAL BINARY ARITHMETIC OPERATORS
3327 //
3328 //=================================================================================================
3329 
3330 //*************************************************************************************************
3361 template< typename T1 // Type of the left-hand side dense vector
3362  , typename T2 > // Type of the right-hand side dense matrix
3363 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
3365 {
3367 
3368  if( (~vec).size() != (~mat).rows() )
3369  throw std::invalid_argument( "Vector and matrix sizes do not match" );
3370 
3371  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
3372 }
3373 //*************************************************************************************************
3374 
3375 
3376 
3377 
3378 //=================================================================================================
3379 //
3380 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
3381 //
3382 //=================================================================================================
3383 
3384 //*************************************************************************************************
3397 template< typename T1 // Type of the left-hand side dense vector
3398  , typename T2 // Type of the right-hand side dense matrix
3399  , bool SO > // Storage order of the right-hand side dense matrix
3400 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
3402 {
3404 
3405  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
3406 }
3407 //*************************************************************************************************
3408 
3409 
3410 
3411 
3412 //=================================================================================================
3413 //
3414 // EXPRESSION TRAIT SPECIALIZATIONS
3415 //
3416 //=================================================================================================
3417 
3418 //*************************************************************************************************
3420 template< typename VT, typename MT, bool AF >
3421 struct SubvectorExprTrait< TDVecDMatMultExpr<VT,MT>, AF >
3422 {
3423  public:
3424  //**********************************************************************************************
3425  typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
3426  //**********************************************************************************************
3427 };
3429 //*************************************************************************************************
3430 
3431 } // namespace blaze
3432 
3433 #endif
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:376
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:112
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:247
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:116
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:386
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:342
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
Header file for the DenseVector base class.
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:295
Header file for the VecScalarMultExpr base class.
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:257
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:242
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:251
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:251
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:244
Header file for the multiplication trait.
const size_t SMP_TDVECDMATMULT_THRESHOLD
SMP dense vector/row-major dense matrix multiplication threshold.This threshold represents the system...
Definition: Thresholds.h:165
Header file for the dense vector SMP implementation.
Header file for the IsDouble type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:246
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:354
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:280
Header file for the IsMatMatMultExpr type trait class.
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Constraint on the data type.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:366
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:322
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:111
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:113
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:254
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:332
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:114
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Header file for the TVecMatMultExpr base class.
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:397
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:129
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:245
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:260
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
Header file for all intrinsic functionality.
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecDMatMultExpr.h:398
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Header file for the sparse vector SMP implementation.
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:115
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:248
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:396
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:138
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:243
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.