All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
65 #include <blaze/system/BLAS.h>
67 #include <blaze/util/Assert.h>
68 #include <blaze/util/Complex.h>
74 #include <blaze/util/DisableIf.h>
75 #include <blaze/util/EnableIf.h>
77 #include <blaze/util/SelectType.h>
78 #include <blaze/util/Types.h>
84 
85 
86 namespace blaze {
87 
88 //=================================================================================================
89 //
90 // CLASS TDVECTDMATMULTEXPR
91 //
92 //=================================================================================================
93 
94 //*************************************************************************************************
101 template< typename VT // Type of the left-hand side dense vector
102  , typename MT > // Type of the right-hand side dense matrix
103 class TDVecTDMatMultExpr : public DenseVector< TDVecTDMatMultExpr<VT,MT>, true >
104  , private TVecMatMultExpr
105  , private Computation
106 {
107  private:
108  //**Type definitions****************************************************************************
109  typedef typename VT::ResultType VRT;
110  typedef typename MT::ResultType MRT;
111  typedef typename VRT::ElementType VET;
112  typedef typename MRT::ElementType MET;
113  typedef typename VT::CompositeType VCT;
114  typedef typename MT::CompositeType MCT;
115  //**********************************************************************************************
116 
117  //**********************************************************************************************
119  enum { evaluateVector = IsComputation<VT>::value };
120  //**********************************************************************************************
121 
122  //**********************************************************************************************
124  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
126  //**********************************************************************************************
127 
128  //**********************************************************************************************
130 
134  template< typename T1, typename T2, typename T3 >
135  struct UseSinglePrecisionKernel {
136  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
137  IsFloat<typename T1::ElementType>::value &&
138  IsFloat<typename T2::ElementType>::value &&
139  IsFloat<typename T3::ElementType>::value };
140  };
142  //**********************************************************************************************
143 
144  //**********************************************************************************************
146 
150  template< typename T1, typename T2, typename T3 >
151  struct UseDoublePrecisionKernel {
152  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
153  IsDouble<typename T1::ElementType>::value &&
154  IsDouble<typename T2::ElementType>::value &&
155  IsDouble<typename T3::ElementType>::value };
156  };
158  //**********************************************************************************************
159 
160  //**********************************************************************************************
162 
166  template< typename T1, typename T2, typename T3 >
167  struct UseSinglePrecisionComplexKernel {
168  typedef complex<float> Type;
169  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170  IsSame<typename T1::ElementType,Type>::value &&
171  IsSame<typename T2::ElementType,Type>::value &&
172  IsSame<typename T3::ElementType,Type>::value };
173  };
175  //**********************************************************************************************
176 
177  //**********************************************************************************************
179 
183  template< typename T1, typename T2, typename T3 >
184  struct UseDoublePrecisionComplexKernel {
185  typedef complex<double> Type;
186  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187  IsSame<typename T1::ElementType,Type>::value &&
188  IsSame<typename T2::ElementType,Type>::value &&
189  IsSame<typename T3::ElementType,Type>::value };
190  };
192  //**********************************************************************************************
193 
194  //**********************************************************************************************
196 
199  template< typename T1, typename T2, typename T3 >
200  struct UseDefaultKernel {
201  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
202  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
203  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
204  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
215  template< typename T1, typename T2, typename T3 >
216  struct UseVectorizedDefaultKernel {
217  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
218  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
219  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
220  IntrinsicTrait<typename T1::ElementType>::addition &&
221  IntrinsicTrait<typename T1::ElementType>::multiplication };
222  };
224  //**********************************************************************************************
225 
226  public:
227  //**Type definitions****************************************************************************
233  typedef const ElementType ReturnType;
234  typedef const ResultType CompositeType;
235 
237  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
238 
240  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
241 
244 
247  //**********************************************************************************************
248 
249  //**Compilation flags***************************************************************************
251  enum { vectorizable = 0 };
252 
254  enum { smpAssignable = 0 };
255  //**********************************************************************************************
256 
257  //**Constructor*********************************************************************************
263  explicit inline TDVecTDMatMultExpr( const VT& vec, const MT& mat )
264  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
265  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
266  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
267  {
268  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
269  }
270  //**********************************************************************************************
271 
272  //**Subscript operator**************************************************************************
278  inline ReturnType operator[]( size_t index ) const {
279  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
280 
281  ElementType res;
282 
283  if( mat_.rows() != 0UL ) {
284  res = vec_[0UL] * mat_(0UL,index);
285  for( size_t j=1UL; j<end_; j+=2UL ) {
286  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
287  }
288  if( end_ < mat_.rows() ) {
289  res += vec_[end_] * mat_(end_,index);
290  }
291  }
292  else {
293  reset( res );
294  }
295 
296  return res;
297  }
298  //**********************************************************************************************
299 
300  //**Size function*******************************************************************************
305  inline size_t size() const {
306  return mat_.columns();
307  }
308  //**********************************************************************************************
309 
310  //**Left operand access*************************************************************************
315  inline LeftOperand leftOperand() const {
316  return vec_;
317  }
318  //**********************************************************************************************
319 
320  //**Right operand access************************************************************************
325  inline RightOperand rightOperand() const {
326  return mat_;
327  }
328  //**********************************************************************************************
329 
330  //**********************************************************************************************
336  template< typename T >
337  inline bool canAlias( const T* alias ) const {
338  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
339  }
340  //**********************************************************************************************
341 
342  //**********************************************************************************************
348  template< typename T >
349  inline bool isAliased( const T* alias ) const {
350  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
351  }
352  //**********************************************************************************************
353 
354  private:
355  //**Member variables****************************************************************************
358  const size_t end_;
359  //**********************************************************************************************
360 
361  //**Assignment to dense vectors*****************************************************************
374  template< typename VT1 > // Type of the target dense vector
375  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
376  {
378 
379  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
380 
381  if( rhs.mat_.rows() == 0UL ) {
382  reset( ~lhs );
383  return;
384  }
385  else if( rhs.mat_.columns() == 0UL ) {
386  return;
387  }
388 
389  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
390  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
391 
392  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
393  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
394  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
395  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
396 
397  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
398  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
399  TDVecTDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
400  else
401  TDVecTDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
402  }
404  //**********************************************************************************************
405 
406  //**Default assignment to dense vectors*********************************************************
420  template< typename VT1 // Type of the left-hand side target vector
421  , typename VT2 // Type of the left-hand side vector operand
422  , typename MT1 > // Type of the right-hand side matrix operand
423  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
424  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
425  {
426  y.assign( x * A );
427  }
429  //**********************************************************************************************
430 
431  //**Vectorized default assignment to dense vectors**********************************************
445  template< typename VT1 // Type of the left-hand side target vector
446  , typename VT2 // Type of the left-hand side vector operand
447  , typename MT1 > // Type of the right-hand side matrix operand
448  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
449  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
450  {
451  typedef IntrinsicTrait<ElementType> IT;
452 
453  const size_t M( A.rows() );
454  const size_t N( A.columns() );
455 
456  size_t j( 0UL );
457 
458  for( ; (j+8UL) <= N; j+=8UL ) {
459  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
460  for( size_t i=0UL; i<M; i+=IT::size ) {
461  const IntrinsicType x1( x.load(i) );
462  xmm1 = xmm1 + x1 * A.load(i,j );
463  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
464  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
465  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
466  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
467  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
468  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
469  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
470  }
471  y[j ] = sum( xmm1 );
472  y[j+1UL] = sum( xmm2 );
473  y[j+2UL] = sum( xmm3 );
474  y[j+3UL] = sum( xmm4 );
475  y[j+4UL] = sum( xmm5 );
476  y[j+5UL] = sum( xmm6 );
477  y[j+6UL] = sum( xmm7 );
478  y[j+7UL] = sum( xmm8 );
479  }
480  for( ; (j+4UL) <= N; j+=4UL ) {
481  IntrinsicType xmm1, xmm2, xmm3, xmm4;
482  for( size_t i=0UL; i<M; i+=IT::size ) {
483  const IntrinsicType x1( x.load(i) );
484  xmm1 = xmm1 + x1 * A.load(i,j );
485  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
486  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
487  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
488  }
489  y[j ] = sum( xmm1 );
490  y[j+1UL] = sum( xmm2 );
491  y[j+2UL] = sum( xmm3 );
492  y[j+3UL] = sum( xmm4 );
493  }
494  for( ; (j+3UL) <= N; j+=3UL ) {
495  IntrinsicType xmm1, xmm2, xmm3;
496  for( size_t i=0UL; i<M; i+=IT::size ) {
497  const IntrinsicType x1( x.load(i) );
498  xmm1 = xmm1 + x1 * A.load(i,j );
499  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
500  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
501  }
502  y[j ] = sum( xmm1 );
503  y[j+1UL] = sum( xmm2 );
504  y[j+2UL] = sum( xmm3 );
505  }
506  for( ; (j+2UL) <= N; j+=2UL ) {
507  IntrinsicType xmm1, xmm2;
508  for( size_t i=0UL; i<M; i+=IT::size ) {
509  const IntrinsicType x1( x.load(i) );
510  xmm1 = xmm1 + x1 * A.load(i,j );
511  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
512  }
513  y[j ] = sum( xmm1 );
514  y[j+1UL] = sum( xmm2 );
515  }
516  if( j < N ) {
517  IntrinsicType xmm1;
518  for( size_t i=0UL; i<M; i+=IT::size ) {
519  xmm1 = xmm1 + A.load(i,j) * x.load(i);
520  }
521  y[j] = sum( xmm1 );
522  }
523  }
525  //**********************************************************************************************
526 
527  //**BLAS-based assignment to dense vectors (default)********************************************
541  template< typename VT1 // Type of the left-hand side target vector
542  , typename VT2 // Type of the left-hand side vector operand
543  , typename MT1 > // Type of the right-hand side matrix operand
544  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
545  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
546  {
547  selectDefaultAssignKernel( y, x, A );
548  }
550  //**********************************************************************************************
551 
552  //**BLAS-based assignment to dense vectors (single precision)***********************************
553 #if BLAZE_BLAS_MODE
554 
567  template< typename VT1 // Type of the left-hand side target vector
568  , typename VT2 // Type of the left-hand side vector operand
569  , typename MT1 > // Type of the right-hand side matrix operand
570  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
571  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
572  {
573  using boost::numeric_cast;
574 
578 
579  const int M ( numeric_cast<int>( A.rows() ) );
580  const int N ( numeric_cast<int>( A.columns() ) );
581  const int lda( numeric_cast<int>( A.spacing() ) );
582 
583  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
584  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
585  }
587 #endif
588  //**********************************************************************************************
589 
590  //**BLAS-based assignment to dense vectors (double precision)***********************************
591 #if BLAZE_BLAS_MODE
592 
605  template< typename VT1 // Type of the left-hand side target vector
606  , typename VT2 // Type of the left-hand side vector operand
607  , typename MT1 > // Type of the right-hand side matrix operand
608  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
609  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
610  {
611  using boost::numeric_cast;
612 
616 
617  const int M ( numeric_cast<int>( A.rows() ) );
618  const int N ( numeric_cast<int>( A.columns() ) );
619  const int lda( numeric_cast<int>( A.spacing() ) );
620 
621  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
622  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
623  }
625 #endif
626  //**********************************************************************************************
627 
628  //**BLAS-based assignment to dense vectors (single precision complex)***************************
629 #if BLAZE_BLAS_MODE
630 
643  template< typename VT1 // Type of the left-hand side target vector
644  , typename VT2 // Type of the left-hand side vector operand
645  , typename MT1 > // Type of the right-hand side matrix operand
646  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
647  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
648  {
649  using boost::numeric_cast;
650 
654  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
655  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
656  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
657 
658  const int M ( numeric_cast<int>( A.rows() ) );
659  const int N ( numeric_cast<int>( A.columns() ) );
660  const int lda( numeric_cast<int>( A.spacing() ) );
661  const complex<float> alpha( 1.0F, 0.0F );
662  const complex<float> beta ( 0.0F, 0.0F );
663 
664  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
665  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
666  }
668 #endif
669  //**********************************************************************************************
670 
671  //**BLAS-based assignment to dense vectors (double precision complex)***************************
672 #if BLAZE_BLAS_MODE
673 
686  template< typename VT1 // Type of the left-hand side target vector
687  , typename VT2 // Type of the left-hand side vector operand
688  , typename MT1 > // Type of the right-hand side matrix operand
689  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
690  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
691  {
692  using boost::numeric_cast;
693 
697  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
698  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
699  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
700 
701  const int M ( numeric_cast<int>( A.rows() ) );
702  const int N ( numeric_cast<int>( A.columns() ) );
703  const int lda( numeric_cast<int>( A.spacing() ) );
704  const complex<double> alpha( 1.0, 0.0 );
705  const complex<double> beta ( 0.0, 0.0 );
706 
707  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
708  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
709  }
711 #endif
712  //**********************************************************************************************
713 
714  //**Assignment to sparse vectors****************************************************************
727  template< typename VT1 > // Type of the target sparse vector
728  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
729  {
731 
735 
736  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
737 
738  const ResultType tmp( rhs );
739  assign( ~lhs, tmp );
740  }
742  //**********************************************************************************************
743 
744  //**Addition assignment to dense vectors********************************************************
757  template< typename VT1 > // Type of the target dense vector
758  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
759  {
761 
762  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
763 
764  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
765  return;
766  }
767 
768  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
769  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
770 
771  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
772  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
773  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
774  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
775 
776  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
777  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
778  TDVecTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
779  else
780  TDVecTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
781  }
783  //**********************************************************************************************
784 
785  //**Default addition assignment to dense vectors************************************************
799  template< typename VT1 // Type of the left-hand side target vector
800  , typename VT2 // Type of the left-hand side vector operand
801  , typename MT1 > // Type of the right-hand side matrix operand
802  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
803  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
804  {
805  y.addAssign( x * A );
806  }
808  //**********************************************************************************************
809 
810  //**Vectorized default addition assignment to dense vectors*************************************
824  template< typename VT1 // Type of the left-hand side target vector
825  , typename VT2 // Type of the left-hand side vector operand
826  , typename MT1 > // Type of the right-hand side matrix operand
827  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
828  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
829  {
830  typedef IntrinsicTrait<ElementType> IT;
831 
832  const size_t M( A.rows() );
833  const size_t N( A.columns() );
834 
835  size_t j( 0UL );
836 
837  for( ; (j+8UL) <= N; j+=8UL ) {
838  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
839  for( size_t i=0UL; i<M; i+=IT::size ) {
840  const IntrinsicType x1( x.load(i) );
841  xmm1 = xmm1 + x1 * A.load(i,j );
842  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
843  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
844  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
845  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
846  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
847  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
848  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
849  }
850  y[j ] += sum( xmm1 );
851  y[j+1UL] += sum( xmm2 );
852  y[j+2UL] += sum( xmm3 );
853  y[j+3UL] += sum( xmm4 );
854  y[j+4UL] += sum( xmm5 );
855  y[j+5UL] += sum( xmm6 );
856  y[j+6UL] += sum( xmm7 );
857  y[j+7UL] += sum( xmm8 );
858  }
859  for( ; (j+4UL) <= N; j+=4UL ) {
860  IntrinsicType xmm1, xmm2, xmm3, xmm4;
861  for( size_t i=0UL; i<M; i+=IT::size ) {
862  const IntrinsicType x1( x.load(i) );
863  xmm1 = xmm1 + x1 * A.load(i,j );
864  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
865  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
866  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
867  }
868  y[j ] += sum( xmm1 );
869  y[j+1UL] += sum( xmm2 );
870  y[j+2UL] += sum( xmm3 );
871  y[j+3UL] += sum( xmm4 );
872  }
873  for( ; (j+3UL) <= N; j+=3UL ) {
874  IntrinsicType xmm1, xmm2, xmm3;
875  for( size_t i=0UL; i<M; i+=IT::size ) {
876  const IntrinsicType x1( x.load(i) );
877  xmm1 = xmm1 + x1 * A.load(i,j );
878  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
879  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
880  }
881  y[j ] += sum( xmm1 );
882  y[j+1UL] += sum( xmm2 );
883  y[j+2UL] += sum( xmm3 );
884  }
885  for( ; (j+2UL) <= N; j+=2UL ) {
886  IntrinsicType xmm1, xmm2;
887  for( size_t i=0UL; i<M; i+=IT::size ) {
888  const IntrinsicType x1( x.load(i) );
889  xmm1 = xmm1 + x1 * A.load(i,j );
890  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
891  }
892  y[j ] += sum( xmm1 );
893  y[j+1UL] += sum( xmm2 );
894  }
895  if( j < N ) {
896  IntrinsicType xmm1;
897  for( size_t i=0UL; i<M; i+=IT::size ) {
898  xmm1 = xmm1 + A.load(i,j) * x.load(i);
899  }
900  y[j] += sum( xmm1 );
901  }
902  }
904  //**********************************************************************************************
905 
906  //**BLAS-based addition assignment to dense vectors (default)***********************************
920  template< typename VT1 // Type of the left-hand side target vector
921  , typename VT2 // Type of the left-hand side vector operand
922  , typename MT1 > // Type of the right-hand side matrix operand
923  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
924  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
925  {
926  selectDefaultAddAssignKernel( y, x, A );
927  }
929  //**********************************************************************************************
930 
931  //**BLAS-based addition assignment to dense vectors (single precision)**************************
932 #if BLAZE_BLAS_MODE
933 
946  template< typename VT1 // Type of the left-hand side target vector
947  , typename VT2 // Type of the left-hand side vector operand
948  , typename MT1 > // Type of the right-hand side matrix operand
949  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
950  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
951  {
952  using boost::numeric_cast;
953 
957 
958  const int M ( numeric_cast<int>( A.rows() ) );
959  const int N ( numeric_cast<int>( A.columns() ) );
960  const int lda( numeric_cast<int>( A.spacing() ) );
961 
962  cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
963  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
964  }
966 #endif
967  //**********************************************************************************************
968 
969  //**BLAS-based addition assignment to dense vectors (double precision)**************************
970 #if BLAZE_BLAS_MODE
971 
984  template< typename VT1 // Type of the left-hand side target vector
985  , typename VT2 // Type of the left-hand side vector operand
986  , typename MT1 > // Type of the right-hand side matrix operand
987  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
988  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
989  {
990  using boost::numeric_cast;
991 
995 
996  const int M ( numeric_cast<int>( A.rows() ) );
997  const int N ( numeric_cast<int>( A.columns() ) );
998  const int lda( numeric_cast<int>( A.spacing() ) );
999 
1000  cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
1001  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1002  }
1004 #endif
1005  //**********************************************************************************************
1006 
1007  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1008 #if BLAZE_BLAS_MODE
1009 
1022  template< typename VT1 // Type of the left-hand side target vector
1023  , typename VT2 // Type of the left-hand side vector operand
1024  , typename MT1 > // Type of the right-hand side matrix operand
1025  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1026  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1027  {
1028  using boost::numeric_cast;
1029 
1033  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1034  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1035  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1036 
1037  const int M ( numeric_cast<int>( A.rows() ) );
1038  const int N ( numeric_cast<int>( A.columns() ) );
1039  const int lda( numeric_cast<int>( A.spacing() ) );
1040  const complex<float> alpha( 1.0F, 0.0F );
1041  const complex<float> beta ( 1.0F, 0.0F );
1042 
1043  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1044  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1045  }
1047 #endif
1048  //**********************************************************************************************
1049 
1050  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1051 #if BLAZE_BLAS_MODE
1052 
1065  template< typename VT1 // Type of the left-hand side target vector
1066  , typename VT2 // Type of the left-hand side vector operand
1067  , typename MT1 > // Type of the right-hand side matrix operand
1068  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1069  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1070  {
1071  using boost::numeric_cast;
1072 
1076  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1077  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1078  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1079 
1080  const int M ( numeric_cast<int>( A.rows() ) );
1081  const int N ( numeric_cast<int>( A.columns() ) );
1082  const int lda( numeric_cast<int>( A.spacing() ) );
1083  const complex<double> alpha( 1.0, 0.0 );
1084  const complex<double> beta ( 1.0, 0.0 );
1085 
1086  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1087  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1088  }
1090 #endif
1091  //**********************************************************************************************
1092 
1093  //**Addition assignment to sparse vectors*******************************************************
1094  // No special implementation for the addition assignment to sparse vectors.
1095  //**********************************************************************************************
1096 
1097  //**Subtraction assignment to dense vectors*****************************************************
1110  template< typename VT1 > // Type of the target dense vector
1111  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1112  {
1114 
1115  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1116 
1117  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1118  return;
1119  }
1120 
1121  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1122  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1123 
1124  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1125  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1126  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1127  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1128 
1129  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1130  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1131  TDVecTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1132  else
1133  TDVecTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1134  }
1136  //**********************************************************************************************
1137 
1138  //**Default subtraction assignment to dense vectors*********************************************
1152  template< typename VT1 // Type of the left-hand side target vector
1153  , typename VT2 // Type of the left-hand side vector operand
1154  , typename MT1 > // Type of the right-hand side matrix operand
1155  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1156  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1157  {
1158  y.subAssign( x * A );
1159  }
1161  //**********************************************************************************************
1162 
1163  //**Vectorized default subtraction assignment to dense vectors**********************************
1177  template< typename VT1 // Type of the left-hand side target vector
1178  , typename VT2 // Type of the left-hand side vector operand
1179  , typename MT1 > // Type of the right-hand side matrix operand
1180  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1181  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1182  {
1183  typedef IntrinsicTrait<ElementType> IT;
1184 
1185  const size_t M( A.rows() );
1186  const size_t N( A.columns() );
1187 
1188  size_t j( 0UL );
1189 
1190  for( ; (j+8UL) <= N; j+=8UL ) {
1191  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1192  for( size_t i=0UL; i<M; i+=IT::size ) {
1193  const IntrinsicType x1( x.load(i) );
1194  xmm1 = xmm1 + x1 * A.load(i,j );
1195  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1196  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1197  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1198  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
1199  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
1200  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
1201  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
1202  }
1203  y[j ] -= sum( xmm1 );
1204  y[j+1UL] -= sum( xmm2 );
1205  y[j+2UL] -= sum( xmm3 );
1206  y[j+3UL] -= sum( xmm4 );
1207  y[j+4UL] -= sum( xmm5 );
1208  y[j+5UL] -= sum( xmm6 );
1209  y[j+6UL] -= sum( xmm7 );
1210  y[j+7UL] -= sum( xmm8 );
1211  }
1212  for( ; (j+4UL) <= N; j+=4UL ) {
1213  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1214  for( size_t i=0UL; i<M; i+=IT::size ) {
1215  const IntrinsicType x1( x.load(i) );
1216  xmm1 = xmm1 + x1 * A.load(i,j );
1217  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1218  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1219  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1220  }
1221  y[j ] -= sum( xmm1 );
1222  y[j+1UL] -= sum( xmm2 );
1223  y[j+2UL] -= sum( xmm3 );
1224  y[j+3UL] -= sum( xmm4 );
1225  }
1226  for( ; (j+3UL) <= N; j+=3UL ) {
1227  IntrinsicType xmm1, xmm2, xmm3;
1228  for( size_t i=0UL; i<M; i+=IT::size ) {
1229  const IntrinsicType x1( x.load(i) );
1230  xmm1 = xmm1 + x1 * A.load(i,j );
1231  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1232  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1233  }
1234  y[j ] -= sum( xmm1 );
1235  y[j+1UL] -= sum( xmm2 );
1236  y[j+2UL] -= sum( xmm3 );
1237  }
1238  for( ; (j+2UL) <= N; j+=2UL ) {
1239  IntrinsicType xmm1, xmm2;
1240  for( size_t i=0UL; i<M; i+=IT::size ) {
1241  const IntrinsicType x1( x.load(i) );
1242  xmm1 = xmm1 + x1 * A.load(i,j );
1243  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1244  }
1245  y[j ] -= sum( xmm1 );
1246  y[j+1UL] -= sum( xmm2 );
1247  }
1248  if( j < N ) {
1249  IntrinsicType xmm1;
1250  for( size_t i=0UL; i<M; i+=IT::size ) {
1251  xmm1 = xmm1 + A.load(i,j) * x.load(i);
1252  }
1253  y[j] -= sum( xmm1 );
1254  }
1255  }
1257  //**********************************************************************************************
1258 
1259  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1273  template< typename VT1 // Type of the left-hand side target vector
1274  , typename VT2 // Type of the left-hand side vector operand
1275  , typename MT1 > // Type of the right-hand side matrix operand
1276  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1277  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1278  {
1279  selectDefaultSubAssignKernel( y, x, A );
1280  }
1282  //**********************************************************************************************
1283 
1284  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1285 #if BLAZE_BLAS_MODE
1286 
1299  template< typename VT1 // Type of the left-hand side target vector
1300  , typename VT2 // Type of the left-hand side vector operand
1301  , typename MT1 > // Type of the right-hand side matrix operand
1302  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1303  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1304  {
1305  using boost::numeric_cast;
1306 
1310 
1311  const int M ( numeric_cast<int>( A.rows() ) );
1312  const int N ( numeric_cast<int>( A.columns() ) );
1313  const int lda( numeric_cast<int>( A.spacing() ) );
1314 
1315  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -1.0F,
1316  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1317  }
1319 #endif
1320  //**********************************************************************************************
1321 
1322  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1323 #if BLAZE_BLAS_MODE
1324 
1337  template< typename VT1 // Type of the left-hand side target vector
1338  , typename VT2 // Type of the left-hand side vector operand
1339  , typename MT1 > // Type of the right-hand side matrix operand
1340  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1341  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1342  {
1343  using boost::numeric_cast;
1344 
1348 
1349  const int M ( numeric_cast<int>( A.rows() ) );
1350  const int N ( numeric_cast<int>( A.columns() ) );
1351  const int lda( numeric_cast<int>( A.spacing() ) );
1352 
1353  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -1.0,
1354  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1355  }
1357 #endif
1358  //**********************************************************************************************
1359 
1360  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1361 #if BLAZE_BLAS_MODE
1362 
1375  template< typename VT1 // Type of the left-hand side target vector
1376  , typename VT2 // Type of the left-hand side vector operand
1377  , typename MT1 > // Type of the right-hand side matrix operand
1378  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1379  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1380  {
1381  using boost::numeric_cast;
1382 
1386  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1387  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1388  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1389 
1390  const int M ( numeric_cast<int>( A.rows() ) );
1391  const int N ( numeric_cast<int>( A.columns() ) );
1392  const int lda( numeric_cast<int>( A.spacing() ) );
1393  const complex<float> alpha( -1.0F, 0.0F );
1394  const complex<float> beta ( 1.0F, 0.0F );
1395 
1396  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1397  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1398  }
1400 #endif
1401  //**********************************************************************************************
1402 
1403  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1404 #if BLAZE_BLAS_MODE
1405 
1418  template< typename VT1 // Type of the left-hand side target vector
1419  , typename VT2 // Type of the left-hand side vector operand
1420  , typename MT1 > // Type of the right-hand side matrix operand
1421  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1422  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1423  {
1424  using boost::numeric_cast;
1425 
1429  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1430  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1431  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1432 
1433  const int M ( numeric_cast<int>( A.rows() ) );
1434  const int N ( numeric_cast<int>( A.columns() ) );
1435  const int lda( numeric_cast<int>( A.spacing() ) );
1436  const complex<double> alpha( -1.0, 0.0 );
1437  const complex<double> beta ( 1.0, 0.0 );
1438 
1439  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1440  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1441  }
1443 #endif
1444  //**********************************************************************************************
1445 
1446  //**Subtraction assignment to sparse vectors****************************************************
1447  // No special implementation for the subtraction assignment to sparse vectors.
1448  //**********************************************************************************************
1449 
1450  //**Multiplication assignment to dense vectors**************************************************
1463  template< typename VT1 > // Type of the target dense vector
1464  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1465  {
1467 
1471 
1472  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1473 
1474  const ResultType tmp( rhs );
1475  multAssign( ~lhs, tmp );
1476  }
1478  //**********************************************************************************************
1479 
1480  //**Multiplication assignment to sparse vectors*************************************************
1481  // No special implementation for the multiplication assignment to sparse vectors.
1482  //**********************************************************************************************
1483 
1484  //**Compile time checks*************************************************************************
1491  //**********************************************************************************************
1492 };
1493 //*************************************************************************************************
1494 
1495 
1496 
1497 
1498 //=================================================================================================
1499 //
1500 // DVECSCALARMULTEXPR SPECIALIZATION
1501 //
1502 //=================================================================================================
1503 
1504 //*************************************************************************************************
1512 template< typename VT // Type of the left-hand side dense vector
1513  , typename MT // Type of the right-hand side dense matrix
1514  , typename ST > // Type of the side scalar value
1515 class DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >
1516  : public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
1517  , private VecScalarMultExpr
1518  , private Computation
1519 {
1520  private:
1521  //**Type definitions****************************************************************************
1522  typedef TDVecTDMatMultExpr<VT,MT> VMM;
1523  typedef typename VMM::ResultType RES;
1524  typedef typename VT::ResultType VRT;
1525  typedef typename MT::ResultType MRT;
1526  typedef typename VRT::ElementType VET;
1527  typedef typename MRT::ElementType MET;
1528  typedef typename VT::CompositeType VCT;
1529  typedef typename MT::CompositeType MCT;
1530  //**********************************************************************************************
1531 
1532  //**********************************************************************************************
1534  enum { evaluateVector = IsComputation<VT>::value };
1535  //**********************************************************************************************
1536 
1537  //**********************************************************************************************
1539  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
1540  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1541  //**********************************************************************************************
1542 
1543  //**********************************************************************************************
1545 
1548  template< typename T1, typename T2, typename T3, typename T4 >
1549  struct UseSinglePrecisionKernel {
1550  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1551  IsFloat<typename T1::ElementType>::value &&
1552  IsFloat<typename T2::ElementType>::value &&
1553  IsFloat<typename T3::ElementType>::value &&
1554  !IsComplex<T4>::value };
1555  };
1556  //**********************************************************************************************
1557 
1558  //**********************************************************************************************
1560 
1563  template< typename T1, typename T2, typename T3, typename T4 >
1564  struct UseDoublePrecisionKernel {
1565  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1566  IsDouble<typename T1::ElementType>::value &&
1567  IsDouble<typename T2::ElementType>::value &&
1568  IsDouble<typename T3::ElementType>::value &&
1569  !IsComplex<T4>::value };
1570  };
1571  //**********************************************************************************************
1572 
1573  //**********************************************************************************************
1575 
1578  template< typename T1, typename T2, typename T3 >
1579  struct UseSinglePrecisionComplexKernel {
1580  typedef complex<float> Type;
1581  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1582  IsSame<typename T1::ElementType,Type>::value &&
1583  IsSame<typename T2::ElementType,Type>::value &&
1584  IsSame<typename T3::ElementType,Type>::value };
1585  };
1586  //**********************************************************************************************
1587 
1588  //**********************************************************************************************
1590 
1593  template< typename T1, typename T2, typename T3 >
1594  struct UseDoublePrecisionComplexKernel {
1595  typedef complex<double> Type;
1596  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1597  IsSame<typename T1::ElementType,Type>::value &&
1598  IsSame<typename T2::ElementType,Type>::value &&
1599  IsSame<typename T3::ElementType,Type>::value };
1600  };
1601  //**********************************************************************************************
1602 
1603  //**********************************************************************************************
1605 
1607  template< typename T1, typename T2, typename T3, typename T4 >
1608  struct UseDefaultKernel {
1609  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1610  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1611  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1612  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1613  };
1614  //**********************************************************************************************
1615 
1616  //**********************************************************************************************
1618 
1621  template< typename T1, typename T2, typename T3, typename T4 >
1622  struct UseVectorizedDefaultKernel {
1623  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1624  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1625  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1626  IsSame<typename T1::ElementType,T4>::value &&
1627  IntrinsicTrait<typename T1::ElementType>::addition &&
1628  IntrinsicTrait<typename T1::ElementType>::multiplication };
1629  };
1630  //**********************************************************************************************
1631 
1632  public:
1633  //**Type definitions****************************************************************************
1634  typedef DVecScalarMultExpr<VMM,ST,true> This;
1635  typedef typename MultTrait<RES,ST>::Type ResultType;
1636  typedef typename ResultType::TransposeType TransposeType;
1637  typedef typename ResultType::ElementType ElementType;
1638  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1639  typedef const ElementType ReturnType;
1640  typedef const ResultType CompositeType;
1641 
1643  typedef const TDVecTDMatMultExpr<VT,MT> LeftOperand;
1644 
1646  typedef ST RightOperand;
1647 
1649  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
1650 
1652  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
1653  //**********************************************************************************************
1654 
1655  //**Compilation flags***************************************************************************
1657  enum { vectorizable = 0 };
1658 
1660  enum { smpAssignable = 0 };
1661  //**********************************************************************************************
1662 
1663  //**Constructor*********************************************************************************
1669  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1670  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1671  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1672  {}
1673  //**********************************************************************************************
1674 
1675  //**Subscript operator**************************************************************************
1681  inline ReturnType operator[]( size_t index ) const {
1682  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1683  return vector_[index] * scalar_;
1684  }
1685  //**********************************************************************************************
1686 
1687  //**Size function*******************************************************************************
1692  inline size_t size() const {
1693  return vector_.size();
1694  }
1695  //**********************************************************************************************
1696 
1697  //**Left operand access*************************************************************************
1702  inline LeftOperand leftOperand() const {
1703  return vector_;
1704  }
1705  //**********************************************************************************************
1706 
1707  //**Right operand access************************************************************************
1712  inline RightOperand rightOperand() const {
1713  return scalar_;
1714  }
1715  //**********************************************************************************************
1716 
1717  //**********************************************************************************************
1723  template< typename T >
1724  inline bool canAlias( const T* alias ) const {
1725  return vector_.canAlias( alias );
1726  }
1727  //**********************************************************************************************
1728 
1729  //**********************************************************************************************
1735  template< typename T >
1736  inline bool isAliased( const T* alias ) const {
1737  return vector_.isAliased( alias );
1738  }
1739  //**********************************************************************************************
1740 
1741  private:
1742  //**Member variables****************************************************************************
1743  LeftOperand vector_;
1744  RightOperand scalar_;
1745  //**********************************************************************************************
1746 
1747  //**Assignment to dense vectors*****************************************************************
1759  template< typename VT1 // Type of the target dense vector
1760  , bool TF > // Transpose flag of the target dense vector
1761  friend inline void assign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
1762  {
1764 
1765  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1766 
1767  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1768  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1769 
1770  if( right.rows() == 0UL ) {
1771  reset( ~lhs );
1772  return;
1773  }
1774  else if( right.columns() == 0UL ) {
1775  return;
1776  }
1777 
1778  LT x( left ); // Evaluation of the left-hand side dense vector operand
1779  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1780 
1781  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1782  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1783  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1784  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1785 
1786  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1787  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1788  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1789  else
1790  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1791  }
1792  //**********************************************************************************************
1793 
1794  //**Default assignment to dense vectors*********************************************************
1808  template< typename VT1 // Type of the left-hand side target vector
1809  , typename VT2 // Type of the left-hand side vector operand
1810  , typename MT1 // Type of the right-hand side matrix operand
1811  , typename ST2 > // Type of the scalar value
1812  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1813  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1814  {
1815  y.assign( x * A * scalar );
1816  }
1817  //**********************************************************************************************
1818 
1819  //**Vectorized default assignment to dense vectors**********************************************
1833  template< typename VT1 // Type of the left-hand side target vector
1834  , typename VT2 // Type of the left-hand side vector operand
1835  , typename MT1 // Type of the right-hand side matrix operand
1836  , typename ST2 > // Type of the scalar value
1837  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1838  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1839  {
1840  typedef IntrinsicTrait<ElementType> IT;
1841 
1842  const size_t M( A.rows() );
1843  const size_t N( A.columns() );
1844 
1845  size_t j( 0UL );
1846 
1847  for( ; (j+8UL) <= N; j+=8UL ) {
1848  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1849  for( size_t i=0UL; i<M; i+=IT::size ) {
1850  const IntrinsicType x1( x.load(i) );
1851  xmm1 = xmm1 + x1 * A.load(i,j );
1852  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1853  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1854  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1855  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
1856  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
1857  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
1858  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
1859  }
1860  y[j ] = sum( xmm1 ) * scalar;
1861  y[j+1UL] = sum( xmm2 ) * scalar;
1862  y[j+2UL] = sum( xmm3 ) * scalar;
1863  y[j+3UL] = sum( xmm4 ) * scalar;
1864  y[j+4UL] = sum( xmm5 ) * scalar;
1865  y[j+5UL] = sum( xmm6 ) * scalar;
1866  y[j+6UL] = sum( xmm7 ) * scalar;
1867  y[j+7UL] = sum( xmm8 ) * scalar;
1868  }
1869  for( ; (j+4UL) <= N; j+=4UL ) {
1870  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1871  for( size_t i=0UL; i<M; i+=IT::size ) {
1872  const IntrinsicType x1( x.load(i) );
1873  xmm1 = xmm1 + x1 * A.load(i,j );
1874  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1875  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1876  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1877  }
1878  y[j ] = sum( xmm1 ) * scalar;
1879  y[j+1UL] = sum( xmm2 ) * scalar;
1880  y[j+2UL] = sum( xmm3 ) * scalar;
1881  y[j+3UL] = sum( xmm4 ) * scalar;
1882  }
1883  for( ; (j+3UL) <= N; j+=3UL ) {
1884  IntrinsicType xmm1, xmm2, xmm3;
1885  for( size_t i=0UL; i<M; i+=IT::size ) {
1886  const IntrinsicType x1( x.load(i) );
1887  xmm1 = xmm1 + x1 * A.load(i,j );
1888  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1889  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1890  }
1891  y[j ] = sum( xmm1 ) * scalar;
1892  y[j+1UL] = sum( xmm2 ) * scalar;
1893  y[j+2UL] = sum( xmm3 ) * scalar;
1894  }
1895  for( ; (j+2UL) <= N; j+=2UL ) {
1896  IntrinsicType xmm1, xmm2;
1897  for( size_t i=0UL; i<M; i+=IT::size ) {
1898  const IntrinsicType x1( x.load(i) );
1899  xmm1 = xmm1 + x1 * A.load(i,j );
1900  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1901  }
1902  y[j ] = sum( xmm1 ) * scalar;
1903  y[j+1UL] = sum( xmm2 ) * scalar;
1904  }
1905  if( j < N ) {
1906  IntrinsicType xmm1;
1907  for( size_t i=0UL; i<M; i+=IT::size ) {
1908  xmm1 = xmm1 + A.load(i,j) * x.load(i);
1909  }
1910  y[j] = sum( xmm1 ) * scalar;
1911  }
1912  }
1913  //**********************************************************************************************
1914 
1915  //**BLAS-based assignment to dense vectors (default)********************************************
1928  template< typename VT1 // Type of the left-hand side target vector
1929  , typename VT2 // Type of the left-hand side vector operand
1930  , typename MT1 // Type of the right-hand side matrix operand
1931  , typename ST2 > // Type of the scalar value
1932  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1933  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1934  {
1935  selectDefaultAssignKernel( y, x, A, scalar );
1936  }
1937  //**********************************************************************************************
1938 
1939  //**BLAS-based assignment to dense vectors (single precision)***********************************
1940 #if BLAZE_BLAS_MODE
1941 
1954  template< typename VT1 // Type of the left-hand side target vector
1955  , typename VT2 // Type of the left-hand side vector operand
1956  , typename MT1 // Type of the right-hand side matrix operand
1957  , typename ST2 > // Type of the scalar value
1958  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1959  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1960  {
1961  using boost::numeric_cast;
1962 
1966 
1967  const int M ( numeric_cast<int>( A.rows() ) );
1968  const int N ( numeric_cast<int>( A.columns() ) );
1969  const int lda( numeric_cast<int>( A.spacing() ) );
1970 
1971  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
1972  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1973  }
1974 #endif
1975  //**********************************************************************************************
1976 
1977  //**BLAS-based assignment to dense vectors (double precision)***********************************
1978 #if BLAZE_BLAS_MODE
1979 
1992  template< typename VT1 // Type of the left-hand side target vector
1993  , typename VT2 // Type of the left-hand side vector operand
1994  , typename MT1 // Type of the right-hand side matrix operand
1995  , typename ST2 > // Type of the scalar value
1996  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1997  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1998  {
1999  using boost::numeric_cast;
2000 
2004 
2005  const int M ( numeric_cast<int>( A.rows() ) );
2006  const int N ( numeric_cast<int>( A.columns() ) );
2007  const int lda( numeric_cast<int>( A.spacing() ) );
2008 
2009  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2010  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2011  }
2012 #endif
2013  //**********************************************************************************************
2014 
2015  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2016 #if BLAZE_BLAS_MODE
2017 
2031  template< typename VT1 // Type of the left-hand side target vector
2032  , typename VT2 // Type of the left-hand side vector operand
2033  , typename MT1 // Type of the right-hand side matrix operand
2034  , typename ST2 > // Type of the scalar value
2035  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2036  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2037  {
2038  using boost::numeric_cast;
2039 
2043  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2044  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2045  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2046 
2047  const int M ( numeric_cast<int>( A.rows() ) );
2048  const int N ( numeric_cast<int>( A.columns() ) );
2049  const int lda( numeric_cast<int>( A.spacing() ) );
2050  const complex<float> alpha( scalar );
2051  const complex<float> beta ( 0.0F, 0.0F );
2052 
2053  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2054  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2055  }
2056 #endif
2057  //**********************************************************************************************
2058 
2059  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2060 #if BLAZE_BLAS_MODE
2061 
2075  template< typename VT1 // Type of the left-hand side target vector
2076  , typename VT2 // Type of the left-hand side vector operand
2077  , typename MT1 // Type of the right-hand side matrix operand
2078  , typename ST2 > // Type of the scalar value
2079  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2080  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2081  {
2082  using boost::numeric_cast;
2083 
2087  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2088  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2089  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2090 
2091  const int M ( numeric_cast<int>( A.rows() ) );
2092  const int N ( numeric_cast<int>( A.columns() ) );
2093  const int lda( numeric_cast<int>( A.spacing() ) );
2094  const complex<double> alpha( scalar );
2095  const complex<double> beta ( 0.0, 0.0 );
2096 
2097  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2098  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2099  }
2100 #endif
2101  //**********************************************************************************************
2102 
2103  //**Assignment to sparse vectors****************************************************************
2115  template< typename VT1 // Type of the target sparse vector
2116  , bool TF > // Transpose flag of the target sparse vector
2117  friend inline void assign( SparseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2118  {
2120 
2124 
2125  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2126 
2127  const ResultType tmp( rhs );
2128  assign( ~lhs, tmp );
2129  }
2130  //**********************************************************************************************
2131 
2132  //**Addition assignment to dense vectors********************************************************
2144  template< typename VT1 // Type of the target dense vector
2145  , bool TF > // Transpose flag of the target dense vector
2146  friend inline void addAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2147  {
2149 
2150  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2151 
2152  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2153  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2154 
2155  if( right.rows() == 0UL || right.columns() == 0UL ) {
2156  return;
2157  }
2158 
2159  LT x( left ); // Evaluation of the left-hand side dense vector operand
2160  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2161 
2162  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2163  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2164  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2165  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2166 
2167  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2168  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2169  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2170  else
2171  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2172  }
2173  //**********************************************************************************************
2174 
2175  //**Default addition assignment to dense vectors************************************************
2189  template< typename VT1 // Type of the left-hand side target vector
2190  , typename VT2 // Type of the left-hand side vector operand
2191  , typename MT1 // Type of the right-hand side matrix operand
2192  , typename ST2 > // Type of the scalar value
2193  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2194  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2195  {
2196  y.addAssign( x * A * scalar );
2197  }
2198  //**********************************************************************************************
2199 
2200  //**Vectorized default addition assignment to dense vectors*************************************
2214  template< typename VT1 // Type of the left-hand side target vector
2215  , typename VT2 // Type of the left-hand side vector operand
2216  , typename MT1 // Type of the right-hand side matrix operand
2217  , typename ST2 > // Type of the scalar value
2218  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2219  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2220  {
2221  typedef IntrinsicTrait<ElementType> IT;
2222 
2223  const size_t M( A.rows() );
2224  const size_t N( A.columns() );
2225 
2226  size_t j( 0UL );
2227 
2228  for( ; (j+8UL) <= N; j+=8UL ) {
2229  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2230  for( size_t i=0UL; i<M; i+=IT::size ) {
2231  const IntrinsicType x1( x.load(i) );
2232  xmm1 = xmm1 + x1 * A.load(i,j );
2233  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2234  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2235  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2236  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
2237  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
2238  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
2239  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
2240  }
2241  y[j ] += sum( xmm1 ) * scalar;
2242  y[j+1UL] += sum( xmm2 ) * scalar;
2243  y[j+2UL] += sum( xmm3 ) * scalar;
2244  y[j+3UL] += sum( xmm4 ) * scalar;
2245  y[j+4UL] += sum( xmm5 ) * scalar;
2246  y[j+5UL] += sum( xmm6 ) * scalar;
2247  y[j+6UL] += sum( xmm7 ) * scalar;
2248  y[j+7UL] += sum( xmm8 ) * scalar;
2249  }
2250  for( ; (j+4UL) <= N; j+=4UL ) {
2251  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2252  for( size_t i=0UL; i<M; i+=IT::size ) {
2253  const IntrinsicType x1( x.load(i) );
2254  xmm1 = xmm1 + x1 * A.load(i,j );
2255  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2256  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2257  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2258  }
2259  y[j ] += sum( xmm1 ) * scalar;
2260  y[j+1UL] += sum( xmm2 ) * scalar;
2261  y[j+2UL] += sum( xmm3 ) * scalar;
2262  y[j+3UL] += sum( xmm4 ) * scalar;
2263  }
2264  for( ; (j+3UL) <= N; j+=3UL ) {
2265  IntrinsicType xmm1, xmm2, xmm3;
2266  for( size_t i=0UL; i<M; i+=IT::size ) {
2267  const IntrinsicType x1( x.load(i) );
2268  xmm1 = xmm1 + x1 * A.load(i,j );
2269  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2270  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2271  }
2272  y[j ] += sum( xmm1 ) * scalar;
2273  y[j+1UL] += sum( xmm2 ) * scalar;
2274  y[j+2UL] += sum( xmm3 ) * scalar;
2275  }
2276  for( ; (j+2UL) <= N; j+=2UL ) {
2277  IntrinsicType xmm1, xmm2;
2278  for( size_t i=0UL; i<M; i+=IT::size ) {
2279  const IntrinsicType x1( x.load(i) );
2280  xmm1 = xmm1 + x1 * A.load(i,j );
2281  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2282  }
2283  y[j ] += sum( xmm1 ) * scalar;
2284  y[j+1UL] += sum( xmm2 ) * scalar;
2285  }
2286  if( j < N ) {
2287  IntrinsicType xmm1;
2288  for( size_t i=0UL; i<M; i+=IT::size ) {
2289  xmm1 = xmm1 + A.load(i,j) * x.load(i);
2290  }
2291  y[j] += sum( xmm1 ) * scalar;
2292  }
2293  }
2294  //**********************************************************************************************
2295 
2296  //**BLAS-based addition assignment to dense vectors (default)***********************************
2310  template< typename VT1 // Type of the left-hand side target vector
2311  , typename VT2 // Type of the left-hand side vector operand
2312  , typename MT1 // Type of the right-hand side matrix operand
2313  , typename ST2 > // Type of the scalar value
2314  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2315  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2316  {
2317  selectDefaultAddAssignKernel( y, x, A, scalar );
2318  }
2319  //**********************************************************************************************
2320 
2321  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2322 #if BLAZE_BLAS_MODE
2323 
2336  template< typename VT1 // Type of the left-hand side target vector
2337  , typename VT2 // Type of the left-hand side vector operand
2338  , typename MT1 // Type of the right-hand side matrix operand
2339  , typename ST2 > // Type of the scalar value
2340  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2341  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2342  {
2343  using boost::numeric_cast;
2344 
2348 
2349  const int M ( numeric_cast<int>( A.rows() ) );
2350  const int N ( numeric_cast<int>( A.columns() ) );
2351  const int lda( numeric_cast<int>( A.spacing() ) );
2352 
2353  cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
2354  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2355  }
2356 #endif
2357  //**********************************************************************************************
2358 
2359  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2360 #if BLAZE_BLAS_MODE
2361 
2374  template< typename VT1 // Type of the left-hand side target vector
2375  , typename VT2 // Type of the left-hand side vector operand
2376  , typename MT1 // Type of the right-hand side matrix operand
2377  , typename ST2 > // Type of the scalar value
2378  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2379  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2380  {
2381  using boost::numeric_cast;
2382 
2386 
2387  const int M ( numeric_cast<int>( A.rows() ) );
2388  const int N ( numeric_cast<int>( A.columns() ) );
2389  const int lda( numeric_cast<int>( A.spacing() ) );
2390 
2391  cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2392  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2393  }
2394 #endif
2395  //**********************************************************************************************
2396 
2397  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2398 #if BLAZE_BLAS_MODE
2399 
2413  template< typename VT1 // Type of the left-hand side target vector
2414  , typename VT2 // Type of the left-hand side vector operand
2415  , typename MT1 // Type of the right-hand side matrix operand
2416  , typename ST2 > // Type of the scalar value
2417  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2418  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2419  {
2420  using boost::numeric_cast;
2421 
2425  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2426  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2427  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2428 
2429  const int M ( numeric_cast<int>( A.rows() ) );
2430  const int N ( numeric_cast<int>( A.columns() ) );
2431  const int lda( numeric_cast<int>( A.spacing() ) );
2432  const complex<float> alpha( scalar );
2433  const complex<float> beta ( 1.0F, 0.0F );
2434 
2435  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2436  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2437  }
2438 #endif
2439  //**********************************************************************************************
2440 
2441  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2442 #if BLAZE_BLAS_MODE
2443 
2457  template< typename VT1 // Type of the left-hand side target vector
2458  , typename VT2 // Type of the left-hand side vector operand
2459  , typename MT1 // Type of the right-hand side matrix operand
2460  , typename ST2 > // Type of the scalar value
2461  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2462  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2463  {
2464  using boost::numeric_cast;
2465 
2469  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2470  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2471  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2472 
2473  const int M ( numeric_cast<int>( A.rows() ) );
2474  const int N ( numeric_cast<int>( A.columns() ) );
2475  const int lda( numeric_cast<int>( A.spacing() ) );
2476  const complex<double> alpha( scalar );
2477  const complex<double> beta ( 1.0, 0.0 );
2478 
2479  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2480  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2481  }
2482 #endif
2483  //**********************************************************************************************
2484 
2485  //**Addition assignment to sparse vectors*******************************************************
2486  // No special implementation for the addition assignment to sparse vectors.
2487  //**********************************************************************************************
2488 
2489  //**Subtraction assignment to dense vectors*****************************************************
2501  template< typename VT1 // Type of the target dense vector
2502  , bool TF > // Transpose flag of the target dense vector
2503  friend inline void subAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2504  {
2506 
2507  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2508 
2509  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2510  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2511 
2512  if( right.rows() == 0UL || right.columns() == 0UL ) {
2513  return;
2514  }
2515 
2516  LT x( left ); // Evaluation of the left-hand side dense vector operand
2517  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2518 
2519  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2520  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2521  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2522  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2523 
2524  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2525  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
2526  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2527  else
2528  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2529  }
2530  //**********************************************************************************************
2531 
2532  //**Default subtraction assignment to dense vectors*********************************************
2546  template< typename VT1 // Type of the left-hand side target vector
2547  , typename VT2 // Type of the left-hand side vector operand
2548  , typename MT1 // Type of the right-hand side matrix operand
2549  , typename ST2 > // Type of the scalar value
2550  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2551  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2552  {
2553  y.subAssign( x * A * scalar );
2554  }
2555  //**********************************************************************************************
2556 
2557  //**Vectorized default subtraction assignment to dense vectors**********************************
2571  template< typename VT1 // Type of the left-hand side target vector
2572  , typename VT2 // Type of the left-hand side vector operand
2573  , typename MT1 // Type of the right-hand side matrix operand
2574  , typename ST2 > // Type of the scalar value
2575  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2576  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2577  {
2578  typedef IntrinsicTrait<ElementType> IT;
2579 
2580  const size_t M( A.rows() );
2581  const size_t N( A.columns() );
2582 
2583  size_t j( 0UL );
2584 
2585  for( ; (j+8UL) <= N; j+=8UL ) {
2586  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2587  for( size_t i=0UL; i<M; i+=IT::size ) {
2588  const IntrinsicType x1( x.load(i) );
2589  xmm1 = xmm1 + x1 * A.load(i,j );
2590  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2591  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2592  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2593  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
2594  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
2595  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
2596  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
2597  }
2598  y[j ] -= sum( xmm1 ) * scalar;
2599  y[j+1UL] -= sum( xmm2 ) * scalar;
2600  y[j+2UL] -= sum( xmm3 ) * scalar;
2601  y[j+3UL] -= sum( xmm4 ) * scalar;
2602  y[j+4UL] -= sum( xmm5 ) * scalar;
2603  y[j+5UL] -= sum( xmm6 ) * scalar;
2604  y[j+6UL] -= sum( xmm7 ) * scalar;
2605  y[j+7UL] -= sum( xmm8 ) * scalar;
2606  }
2607  for( ; (j+4UL) <= N; j+=4UL ) {
2608  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2609  for( size_t i=0UL; i<M; i+=IT::size ) {
2610  const IntrinsicType x1( x.load(i) );
2611  xmm1 = xmm1 + x1 * A.load(i,j );
2612  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2613  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2614  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2615  }
2616  y[j ] -= sum( xmm1 ) * scalar;
2617  y[j+1UL] -= sum( xmm2 ) * scalar;
2618  y[j+2UL] -= sum( xmm3 ) * scalar;
2619  y[j+3UL] -= sum( xmm4 ) * scalar;
2620  }
2621  for( ; (j+3UL) <= N; j+=3UL ) {
2622  IntrinsicType xmm1, xmm2, xmm3;
2623  for( size_t i=0UL; i<M; i+=IT::size ) {
2624  const IntrinsicType x1( x.load(i) );
2625  xmm1 = xmm1 + x1 * A.load(i,j );
2626  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2627  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2628  }
2629  y[j ] -= sum( xmm1 ) * scalar;
2630  y[j+1UL] -= sum( xmm2 ) * scalar;
2631  y[j+2UL] -= sum( xmm3 ) * scalar;
2632  }
2633  for( ; (j+2UL) <= N; j+=2UL ) {
2634  IntrinsicType xmm1, xmm2;
2635  for( size_t i=0UL; i<M; i+=IT::size ) {
2636  const IntrinsicType x1( x.load(i) );
2637  xmm1 = xmm1 + x1 * A.load(i,j );
2638  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2639  }
2640  y[j ] -= sum( xmm1 ) * scalar;
2641  y[j+1UL] -= sum( xmm2 ) * scalar;
2642  }
2643  if( j < N ) {
2644  IntrinsicType xmm1;
2645  for( size_t i=0UL; i<M; i+=IT::size ) {
2646  xmm1 = xmm1 + A.load(i,j) * x.load(i);
2647  }
2648  y[j] -= sum( xmm1 ) * scalar;
2649  }
2650  }
2651  //**********************************************************************************************
2652 
2653  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2668  template< typename VT1 // Type of the left-hand side target vector
2669  , typename VT2 // Type of the left-hand side vector operand
2670  , typename MT1 // Type of the right-hand side matrix operand
2671  , typename ST2 > // Type of the scalar value
2672  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2673  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2674  {
2675  selectDefaultSubAssignKernel( y, x, A, scalar );
2676  }
2677  //**********************************************************************************************
2678 
2679  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2680 #if BLAZE_BLAS_MODE
2681 
2694  template< typename VT1 // Type of the left-hand side target vector
2695  , typename VT2 // Type of the left-hand side vector operand
2696  , typename MT1 // Type of the right-hand side matrix operand
2697  , typename ST2 > // Type of the scalar value
2698  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2699  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2700  {
2701  using boost::numeric_cast;
2702 
2706 
2707  const int M ( numeric_cast<int>( A.rows() ) );
2708  const int N ( numeric_cast<int>( A.columns() ) );
2709  const int lda( numeric_cast<int>( A.spacing() ) );
2710 
2711  cblas_sgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2712  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2713  }
2714 #endif
2715  //**********************************************************************************************
2716 
2717  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2718 #if BLAZE_BLAS_MODE
2719 
2732  template< typename VT1 // Type of the left-hand side target vector
2733  , typename VT2 // Type of the left-hand side vector operand
2734  , typename MT1 // Type of the right-hand side matrix operand
2735  , typename ST2 > // Type of the scalar value
2736  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2737  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2738  {
2739  using boost::numeric_cast;
2740 
2744 
2745  const int M ( numeric_cast<int>( A.rows() ) );
2746  const int N ( numeric_cast<int>( A.columns() ) );
2747  const int lda( numeric_cast<int>( A.spacing() ) );
2748 
2749  cblas_dgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2750  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2751  }
2752 #endif
2753  //**********************************************************************************************
2754 
2755  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2756 #if BLAZE_BLAS_MODE
2757 
2772  template< typename VT1 // Type of the left-hand side target vector
2773  , typename VT2 // Type of the left-hand side vector operand
2774  , typename MT1 // Type of the right-hand side matrix operand
2775  , typename ST2 > // Type of the scalar value
2776  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2777  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2778  {
2779  using boost::numeric_cast;
2780 
2784  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2785  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2786  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2787 
2788  const int M ( numeric_cast<int>( A.rows() ) );
2789  const int N ( numeric_cast<int>( A.columns() ) );
2790  const int lda( numeric_cast<int>( A.spacing() ) );
2791  const complex<float> alpha( -scalar );
2792  const complex<float> beta ( 1.0F, 0.0F );
2793 
2794  cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2795  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2796  }
2797 #endif
2798  //**********************************************************************************************
2799 
2800  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2801 #if BLAZE_BLAS_MODE
2802 
2817  template< typename VT1 // Type of the left-hand side target vector
2818  , typename VT2 // Type of the left-hand side vector operand
2819  , typename MT1 // Type of the right-hand side matrix operand
2820  , typename ST2 > // Type of the scalar value
2821  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2822  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2823  {
2824  using boost::numeric_cast;
2825 
2829  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2830  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2831  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2832 
2833  const int M ( numeric_cast<int>( A.rows() ) );
2834  const int N ( numeric_cast<int>( A.columns() ) );
2835  const int lda( numeric_cast<int>( A.spacing() ) );
2836  const complex<double> alpha( -scalar );
2837  const complex<double> beta ( 1.0, 0.0 );
2838 
2839  cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2840  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2841  }
2842 #endif
2843  //**********************************************************************************************
2844 
2845  //**Subtraction assignment to sparse vectors****************************************************
2846  // No special implementation for the subtraction assignment to sparse vectors.
2847  //**********************************************************************************************
2848 
2849  //**Multiplication assignment to dense vectors**************************************************
2861  template< typename VT1 // Type of the target dense vector
2862  , bool TF > // Transpose flag of the target dense vector
2863  friend inline void multAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
2864  {
2866 
2870 
2871  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2872 
2873  const ResultType tmp( rhs );
2874  multAssign( ~lhs, tmp );
2875  }
2876  //**********************************************************************************************
2877 
2878  //**Multiplication assignment to sparse vectors*******************************************************
2879  // No special implementation for the multiplication assignment to sparse vectors.
2880  //**********************************************************************************************
2881 
2882  //**Compile time checks*************************************************************************
2891  //**********************************************************************************************
2892 };
2894 //*************************************************************************************************
2895 
2896 
2897 
2898 
2899 //=================================================================================================
2900 //
2901 // GLOBAL BINARY ARITHMETIC OPERATORS
2902 //
2903 //=================================================================================================
2904 
2905 //*************************************************************************************************
2936 template< typename T1 // Type of the left-hand side dense vector
2937  , typename T2 > // Type of the right-hand side dense matrix
2938 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecTDMatMultExpr<T1,T2> >::Type
2940 {
2942 
2943  if( (~vec).size() != (~mat).rows() )
2944  throw std::invalid_argument( "Vector and matrix sizes do not match" );
2945 
2946  return TDVecTDMatMultExpr<T1,T2>( ~vec, ~mat );
2947 }
2948 //*************************************************************************************************
2949 
2950 
2951 
2952 
2953 //=================================================================================================
2954 //
2955 // EXPRESSION TRAIT SPECIALIZATIONS
2956 //
2957 //=================================================================================================
2958 
2959 //*************************************************************************************************
2961 template< typename VT, typename MT >
2962 struct SubvectorExprTrait< TDVecTDMatMultExpr<VT,MT> >
2963 {
2964  public:
2965  //**********************************************************************************************
2966  typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT>::Type >::Type Type;
2967  //**********************************************************************************************
2968 };
2970 //*************************************************************************************************
2971 
2972 } // namespace blaze
2973 
2974 #endif
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:237
Expression object for transpose dense vector-transpose dense matrix multiplications.The TDVecTDMatMultExpr class represents the compile time expression for multiplications between transpose dense vectors and column-major dense matrices.
Definition: Forward.h:131
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecTDMatMultExpr.h:358
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:315
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDVecTDMatMultExpr.h:325
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the IsSame and IsStrictlySame type traits.
const size_t TDVECTDMATMULT_THRESHOLD
Dense Vector/column-major dense matrix multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:102
Constraint on the data type.
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:114
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:112
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:246
Header file for the DenseVector base class.
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:240
Header file for the RequiresEvaluation type trait.
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:250
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecTDMatMultExpr.h:337
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecTDMatMultExpr.h:231
Header file for the IsMatMatMultExpr type trait class.
Header file for the IsBlasCompatible type trait.
TDVecTDMatMultExpr< VT, MT > This
Type of this TDVecTDMatMultExpr instance.
Definition: TDVecTDMatMultExpr.h:228
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:113
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecTDMatMultExpr.h:232
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecTDMatMultExpr.h:243
Constraints on the storage order of matrix types.
Constraint on the data type.
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:110
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:109
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecTDMatMultExpr.h:278
Header file for the EnableIf class template.
Header file for the IsNumeric type trait.
Header file for the SubmatrixExprTrait class template.
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:356
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecTDMatMultExpr.h:234
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Header file for the TVecMatMultExpr base class.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:233
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecTDMatMultExpr.h:111
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:230
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:357
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecTDMatMultExpr.h:349
Header file for the IsComputation type trait class.
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:229
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
TDVecTDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecTDMatMultExpr class.
Definition: TDVecTDMatMultExpr.h:263
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:138
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecTDMatMultExpr.h:305
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.