All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
66 #include <blaze/system/BLAS.h>
68 #include <blaze/util/Assert.h>
69 #include <blaze/util/Complex.h>
75 #include <blaze/util/DisableIf.h>
76 #include <blaze/util/EnableIf.h>
78 #include <blaze/util/SelectType.h>
79 #include <blaze/util/Types.h>
85 
86 
87 namespace blaze {
88 
89 //=================================================================================================
90 //
91 // CLASS TDMATDVECMULTEXPR
92 //
93 //=================================================================================================
94 
95 //*************************************************************************************************
102 template< typename MT // Type of the left-hand side dense matrix
103  , typename VT > // Type of the right-hand side dense vector
104 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
105  , private MatVecMultExpr
106  , private Computation
107 {
108  private:
109  //**Type definitions****************************************************************************
110  typedef typename MT::ResultType MRT;
111  typedef typename VT::ResultType VRT;
112  typedef typename MRT::ElementType MET;
113  typedef typename VRT::ElementType VET;
114  typedef typename MT::CompositeType MCT;
115  typedef typename VT::CompositeType VCT;
116  //**********************************************************************************************
117 
118  //**********************************************************************************************
120  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
122  //**********************************************************************************************
123 
124  //**********************************************************************************************
126  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
127  //**********************************************************************************************
128 
129  //**********************************************************************************************
131 
135  template< typename T1 >
136  struct UseSMPAssign {
137  enum { value = ( evaluateMatrix || evaluateVector ) };
138  };
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144 
148  template< typename T1, typename T2, typename T3 >
149  struct UseSinglePrecisionKernel {
150  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
154  };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
164  template< typename T1, typename T2, typename T3 >
165  struct UseDoublePrecisionKernel {
166  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
180  template< typename T1, typename T2, typename T3 >
181  struct UseSinglePrecisionComplexKernel {
182  typedef complex<float> Type;
183  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseDoublePrecisionComplexKernel {
199  typedef complex<double> Type;
200  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
213  template< typename T1, typename T2, typename T3 >
214  struct UseDefaultKernel {
215  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
216  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
217  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
218  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
219  };
221  //**********************************************************************************************
222 
223  //**********************************************************************************************
225 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
236  };
238  //**********************************************************************************************
239 
240  public:
241  //**Type definitions****************************************************************************
247  typedef const ElementType ReturnType;
248  typedef const ResultType CompositeType;
249 
251  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
252 
254  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
255 
258 
261  //**********************************************************************************************
262 
263  //**Compilation flags***************************************************************************
265  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
269 
271  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
272  !evaluateVector && VT::smpAssignable };
273  //**********************************************************************************************
274 
275  //**Constructor*********************************************************************************
281  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
282  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
283  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
284  , end_( ( (mat.columns()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
285  {
286  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
287  }
288  //**********************************************************************************************
289 
290  //**Subscript operator**************************************************************************
296  inline ReturnType operator[]( size_t index ) const {
297  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
298 
299  ElementType res;
300 
301  if( mat_.columns() != 0UL ) {
302  res = mat_(index,0UL) * vec_[0UL];
303  for( size_t j=1UL; j<end_; j+=2UL ) {
304  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
305  }
306  if( end_ < mat_.columns() ) {
307  res += mat_(index,end_) * vec_[end_];
308  }
309  }
310  else {
311  reset( res );
312  }
313 
314  return res;
315  }
316  //**********************************************************************************************
317 
318  //**Size function*******************************************************************************
323  inline size_t size() const {
324  return mat_.rows();
325  }
326  //**********************************************************************************************
327 
328  //**Left operand access*************************************************************************
333  inline LeftOperand leftOperand() const {
334  return mat_;
335  }
336  //**********************************************************************************************
337 
338  //**Right operand access************************************************************************
343  inline RightOperand rightOperand() const {
344  return vec_;
345  }
346  //**********************************************************************************************
347 
348  //**********************************************************************************************
354  template< typename T >
355  inline bool canAlias( const T* alias ) const {
356  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
357  }
358  //**********************************************************************************************
359 
360  //**********************************************************************************************
366  template< typename T >
367  inline bool isAliased( const T* alias ) const {
368  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
369  }
370  //**********************************************************************************************
371 
372  //**********************************************************************************************
377  inline bool isAligned() const {
378  return mat_.isAligned() && vec_.isAligned();
379  }
380  //**********************************************************************************************
381 
382  //**********************************************************************************************
387  inline bool canSMPAssign() const {
388  return ( !BLAZE_BLAS_IS_PARALLEL ||
389  ( IsComputation<MT>::value && !evaluateMatrix ) ||
390  ( mat_.rows() * mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
392  }
393  //**********************************************************************************************
394 
395  private:
396  //**Member variables****************************************************************************
399  const size_t end_;
400  //**********************************************************************************************
401 
402  //**Assignment to dense vectors*****************************************************************
415  template< typename VT1 > // Type of the target dense vector
416  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
417  {
419 
420  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
421 
422  if( rhs.mat_.rows() == 0UL ) {
423  return;
424  }
425  else if( rhs.mat_.columns() == 0UL ) {
426  reset( ~lhs );
427  return;
428  }
429 
430  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
431  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
432 
433  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
434  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
435  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
436  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
437 
438  TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
439  }
441  //**********************************************************************************************
442 
443  //**Assignment to dense vectors (kernel selection)**********************************************
454  template< typename VT1 // Type of the left-hand side target vector
455  , typename MT1 // Type of the left-hand side matrix operand
456  , typename VT2 > // Type of the right-hand side vector operand
457  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
458  {
459  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
460  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
461  TDMatDVecMultExpr::selectDefaultAssignKernel( y, A, x );
462  else
463  TDMatDVecMultExpr::selectBlasAssignKernel( y, A, x );
464  }
466  //**********************************************************************************************
467 
468  //**Default assignment to dense vectors*********************************************************
482  template< typename VT1 // Type of the left-hand side target vector
483  , typename MT1 // Type of the left-hand side matrix operand
484  , typename VT2 > // Type of the right-hand side vector operand
485  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
486  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
487  {
488  const size_t M( A.rows() );
489  const size_t N( A.columns() );
490 
491  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
492  const size_t iend( M & size_t(-2) );
493 
494  for( size_t i=0UL; i<M; ++i ) {
495  y[i] = x[0UL] * A(i,0UL);
496  }
497  for( size_t j=1UL; j<N; ++j ) {
498  for( size_t i=0UL; i<iend; i+=2UL ) {
499  y[i ] += x[j] * A(i ,j);
500  y[i+1UL] += x[j] * A(i+1UL,j);
501  }
502  if( iend < M ) {
503  y[iend] += x[j] * A(iend,j);
504  }
505  }
506  }
508  //**********************************************************************************************
509 
510  //**Vectorized default assignment to dense vectors**********************************************
524  template< typename VT1 // Type of the left-hand side target vector
525  , typename MT1 // Type of the left-hand side matrix operand
526  , typename VT2 > // Type of the right-hand side vector operand
527  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
528  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
529  {
530  typedef IntrinsicTrait<ElementType> IT;
531 
532  const size_t M( A.rows() );
533  const size_t N( A.columns() );
534 
535  size_t i( 0UL );
536 
537  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
538  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
539  for( size_t j=0UL; j<N; ++j ) {
540  const IntrinsicType x1( set( x[j] ) );
541  xmm1 = xmm1 + A.load(i ,j) * x1;
542  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
543  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
544  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
545  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
546  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
547  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
548  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
549  }
550  y.store( i , xmm1 );
551  y.store( i+IT::size , xmm2 );
552  y.store( i+IT::size*2UL, xmm3 );
553  y.store( i+IT::size*3UL, xmm4 );
554  y.store( i+IT::size*4UL, xmm5 );
555  y.store( i+IT::size*5UL, xmm6 );
556  y.store( i+IT::size*6UL, xmm7 );
557  y.store( i+IT::size*7UL, xmm8 );
558  }
559  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
560  IntrinsicType xmm1, xmm2, xmm3, xmm4;
561  for( size_t j=0UL; j<N; ++j ) {
562  const IntrinsicType x1( set( x[j] ) );
563  xmm1 = xmm1 + A.load(i ,j) * x1;
564  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
565  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
566  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
567  }
568  y.store( i , xmm1 );
569  y.store( i+IT::size , xmm2 );
570  y.store( i+IT::size*2UL, xmm3 );
571  y.store( i+IT::size*3UL, xmm4 );
572  }
573  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
574  IntrinsicType xmm1, xmm2, xmm3;
575  for( size_t j=0UL; j<N; ++j ) {
576  const IntrinsicType x1( set( x[j] ) );
577  xmm1 = xmm1 + A.load(i ,j) * x1;
578  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
579  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
580  }
581  y.store( i , xmm1 );
582  y.store( i+IT::size , xmm2 );
583  y.store( i+IT::size*2UL, xmm3 );
584  }
585  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
586  IntrinsicType xmm1, xmm2;
587  for( size_t j=0UL; j<N; ++j ) {
588  const IntrinsicType x1( set( x[j] ) );
589  xmm1 = xmm1 + A.load(i ,j) * x1;
590  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
591  }
592  y.store( i , xmm1 );
593  y.store( i+IT::size, xmm2 );
594  }
595  if( i < M ) {
596  IntrinsicType xmm1;
597  for( size_t j=0UL; j<N; ++j ) {
598  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
599  }
600  y.store( i, xmm1 );
601  }
602  }
604  //**********************************************************************************************
605 
606  //**BLAS-based assignment to dense vectors (default)********************************************
620  template< typename VT1 // Type of the left-hand side target vector
621  , typename MT1 // Type of the left-hand side matrix operand
622  , typename VT2 > // Type of the right-hand side vector operand
623  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
624  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
625  {
626  selectDefaultAssignKernel( y, A, x );
627  }
629  //**********************************************************************************************
630 
631  //**BLAS-based assignment to dense vectors (single precision)***********************************
632 #if BLAZE_BLAS_MODE
633 
646  template< typename VT1 // Type of the left-hand side target vector
647  , typename MT1 // Type of the left-hand side matrix operand
648  , typename VT2 > // Type of the right-hand side vector operand
649  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
650  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
651  {
652  using boost::numeric_cast;
653 
657 
658  const int M ( numeric_cast<int>( A.rows() ) );
659  const int N ( numeric_cast<int>( A.columns() ) );
660  const int lda( numeric_cast<int>( A.spacing() ) );
661 
662  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
663  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
664  }
666 #endif
667  //**********************************************************************************************
668 
669  //**BLAS-based assignment to dense vectors (double precision)***********************************
670 #if BLAZE_BLAS_MODE
671 
684  template< typename VT1 // Type of the left-hand side target vector
685  , typename MT1 // Type of the left-hand side matrix operand
686  , typename VT2 > // Type of the right-hand side vector operand
687  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
688  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
689  {
690  using boost::numeric_cast;
691 
695 
696  const int M ( numeric_cast<int>( A.rows() ) );
697  const int N ( numeric_cast<int>( A.columns() ) );
698  const int lda( numeric_cast<int>( A.spacing() ) );
699 
700  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
701  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
702  }
704 #endif
705  //**********************************************************************************************
706 
707  //**BLAS-based assignment to dense vectors (single precision complex)***************************
708 #if BLAZE_BLAS_MODE
709 
722  template< typename VT1 // Type of the left-hand side target vector
723  , typename MT1 // Type of the left-hand side matrix operand
724  , typename VT2 > // Type of the right-hand side vector operand
725  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
726  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
727  {
728  using boost::numeric_cast;
729 
733  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
734  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
735  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
736 
737  const int M ( numeric_cast<int>( A.rows() ) );
738  const int N ( numeric_cast<int>( A.columns() ) );
739  const int lda( numeric_cast<int>( A.spacing() ) );
740  const complex<float> alpha( 1.0F, 0.0F );
741  const complex<float> beta ( 0.0F, 0.0F );
742 
743  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
744  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
745  }
747 #endif
748  //**********************************************************************************************
749 
750  //**BLAS-based assignment to dense vectors (double precision complex)***************************
751 #if BLAZE_BLAS_MODE
752 
765  template< typename VT1 // Type of the left-hand side target vector
766  , typename MT1 // Type of the left-hand side matrix operand
767  , typename VT2 > // Type of the right-hand side vector operand
768  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
769  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
770  {
771  using boost::numeric_cast;
772 
776  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
777  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
778  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
779 
780  const int M ( numeric_cast<int>( A.rows() ) );
781  const int N ( numeric_cast<int>( A.columns() ) );
782  const int lda( numeric_cast<int>( A.spacing() ) );
783  const complex<double> alpha( 1.0, 0.0 );
784  const complex<double> beta ( 0.0, 0.0 );
785 
786  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
787  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
788  }
790 #endif
791  //**********************************************************************************************
792 
793  //**Assignment to sparse vectors****************************************************************
806  template< typename VT1 > // Type of the target sparse vector
807  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
808  {
810 
814 
815  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
816 
817  const ResultType tmp( serial( rhs ) );
818  assign( ~lhs, tmp );
819  }
821  //**********************************************************************************************
822 
823  //**Addition assignment to dense vectors********************************************************
836  template< typename VT1 > // Type of the target dense vector
837  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
838  {
840 
841  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
842 
843  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
844  return;
845  }
846 
847  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
848  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
849 
850  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
851  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
852  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
853  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
854 
855  TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
856  }
858  //**********************************************************************************************
859 
860  //**Addition assignment to dense vectors (kernel selection)*************************************
871  template< typename VT1 // Type of the left-hand side target vector
872  , typename MT1 // Type of the left-hand side matrix operand
873  , typename VT2 > // Type of the right-hand side vector operand
874  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
875  {
876  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
877  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
878  TDMatDVecMultExpr::selectDefaultAddAssignKernel( y, A, x );
879  else
880  TDMatDVecMultExpr::selectBlasAddAssignKernel( y, A, x );
881  }
883  //**********************************************************************************************
884 
885  //**Default addition assignment to dense vectors************************************************
899  template< typename VT1 // Type of the left-hand side target vector
900  , typename MT1 // Type of the left-hand side matrix operand
901  , typename VT2 > // Type of the right-hand side vector operand
902  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
903  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
904  {
905  const size_t M( A.rows() );
906  const size_t N( A.columns() );
907 
908  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
909  const size_t iend( M & size_t(-2) );
910 
911  for( size_t j=0UL; j<N; ++j ) {
912  for( size_t i=0UL; i<iend; i+=2UL ) {
913  y[i ] += x[j] * A(i ,j);
914  y[i+1UL] += x[j] * A(i+1UL,j);
915  }
916  if( iend < M ) {
917  y[iend] += x[j] * A(iend,j);
918  }
919  }
920  }
922  //**********************************************************************************************
923 
924  //**Vectorized default addition assignment to dense vectors*************************************
938  template< typename VT1 // Type of the left-hand side target vector
939  , typename MT1 // Type of the left-hand side matrix operand
940  , typename VT2 > // Type of the right-hand side vector operand
941  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
942  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
943  {
944  typedef IntrinsicTrait<ElementType> IT;
945 
946  const size_t M( A.rows() );
947  const size_t N( A.columns() );
948 
949  size_t i( 0UL );
950 
951  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
952  IntrinsicType xmm1( y.load(i ) );
953  IntrinsicType xmm2( y.load(i+IT::size ) );
954  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
955  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
956  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
957  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
958  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
959  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
960  for( size_t j=0UL; j<N; ++j ) {
961  const IntrinsicType x1( set( x[j] ) );
962  xmm1 = xmm1 + A.load(i ,j) * x1;
963  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
964  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
965  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
966  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
967  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
968  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
969  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
970  }
971  y.store( i , xmm1 );
972  y.store( i+IT::size , xmm2 );
973  y.store( i+IT::size*2UL, xmm3 );
974  y.store( i+IT::size*3UL, xmm4 );
975  y.store( i+IT::size*4UL, xmm5 );
976  y.store( i+IT::size*5UL, xmm6 );
977  y.store( i+IT::size*6UL, xmm7 );
978  y.store( i+IT::size*7UL, xmm8 );
979  }
980  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
981  IntrinsicType xmm1( y.load(i ) );
982  IntrinsicType xmm2( y.load(i+IT::size ) );
983  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
984  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
985  for( size_t j=0UL; j<N; ++j ) {
986  const IntrinsicType x1( set( x[j] ) );
987  xmm1 = xmm1 + A.load(i ,j) * x1;
988  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
989  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
990  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
991  }
992  y.store( i , xmm1 );
993  y.store( i+IT::size , xmm2 );
994  y.store( i+IT::size*2UL, xmm3 );
995  y.store( i+IT::size*3UL, xmm4 );
996  }
997  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
998  IntrinsicType xmm1( y.load(i ) );
999  IntrinsicType xmm2( y.load(i+IT::size ) );
1000  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1001  for( size_t j=0UL; j<N; ++j ) {
1002  const IntrinsicType x1( set( x[j] ) );
1003  xmm1 = xmm1 + A.load(i ,j) * x1;
1004  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1005  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1006  }
1007  y.store( i , xmm1 );
1008  y.store( i+IT::size , xmm2 );
1009  y.store( i+IT::size*2UL, xmm3 );
1010  }
1011  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1012  IntrinsicType xmm1( y.load(i ) );
1013  IntrinsicType xmm2( y.load(i+IT::size) );
1014  for( size_t j=0UL; j<N; ++j ) {
1015  const IntrinsicType x1( set( x[j] ) );
1016  xmm1 = xmm1 + A.load(i ,j) * x1;
1017  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1018  }
1019  y.store( i , xmm1 );
1020  y.store( i+IT::size, xmm2 );
1021  }
1022  if( i < M ) {
1023  IntrinsicType xmm1( y.load(i) );
1024  for( size_t j=0UL; j<N; ++j ) {
1025  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
1026  }
1027  y.store( i, xmm1 );
1028  }
1029  }
1031  //**********************************************************************************************
1032 
1033  //**BLAS-based addition assignment to dense vectors (default)***********************************
1047  template< typename VT1 // Type of the left-hand side target vector
1048  , typename MT1 // Type of the left-hand side matrix operand
1049  , typename VT2 > // Type of the right-hand side vector operand
1050  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1051  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1052  {
1053  selectDefaultAddAssignKernel( y, A, x );
1054  }
1056  //**********************************************************************************************
1057 
1058  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1059 #if BLAZE_BLAS_MODE
1060 
1073  template< typename VT1 // Type of the left-hand side target vector
1074  , typename MT1 // Type of the left-hand side matrix operand
1075  , typename VT2 > // Type of the right-hand side vector operand
1076  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1077  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1078  {
1079  using boost::numeric_cast;
1080 
1084 
1085  const int M ( numeric_cast<int>( A.rows() ) );
1086  const int N ( numeric_cast<int>( A.columns() ) );
1087  const int lda( numeric_cast<int>( A.spacing() ) );
1088 
1089  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, 1.0F,
1090  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1091  }
1093 #endif
1094  //**********************************************************************************************
1095 
1096  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1097 #if BLAZE_BLAS_MODE
1098 
1111  template< typename VT1 // Type of the left-hand side target vector
1112  , typename MT1 // Type of the left-hand side matrix operand
1113  , typename VT2 > // Type of the right-hand side vector operand
1114  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1115  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1116  {
1117  using boost::numeric_cast;
1118 
1122 
1123  const int M ( numeric_cast<int>( A.rows() ) );
1124  const int N ( numeric_cast<int>( A.columns() ) );
1125  const int lda( numeric_cast<int>( A.spacing() ) );
1126 
1127  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, 1.0,
1128  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1129  }
1131 #endif
1132  //**********************************************************************************************
1133 
1134  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1135 #if BLAZE_BLAS_MODE
1136 
1149  template< typename VT1 // Type of the left-hand side target vector
1150  , typename MT1 // Type of the left-hand side matrix operand
1151  , typename VT2 > // Type of the right-hand side vector operand
1152  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1153  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1154  {
1155  using boost::numeric_cast;
1156 
1160  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1161  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1162  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1163 
1164  const int M ( numeric_cast<int>( A.rows() ) );
1165  const int N ( numeric_cast<int>( A.columns() ) );
1166  const int lda( numeric_cast<int>( A.spacing() ) );
1167  const complex<float> alpha( 1.0F, 0.0F );
1168  const complex<float> beta ( 1.0F, 0.0F );
1169 
1170  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1171  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1172  }
1174 #endif
1175  //**********************************************************************************************
1176 
1177  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1178 #if BLAZE_BLAS_MODE
1179 
1192  template< typename VT1 // Type of the left-hand side target vector
1193  , typename MT1 // Type of the left-hand side matrix operand
1194  , typename VT2 > // Type of the right-hand side vector operand
1195  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1196  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1197  {
1198  using boost::numeric_cast;
1199 
1203  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1204  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1205  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1206 
1207  const int M ( numeric_cast<int>( A.rows() ) );
1208  const int N ( numeric_cast<int>( A.columns() ) );
1209  const int lda( numeric_cast<int>( A.spacing() ) );
1210  const complex<double> alpha( 1.0, 0.0 );
1211  const complex<double> beta ( 1.0, 0.0 );
1212 
1213  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1214  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1215  }
1217 #endif
1218  //**********************************************************************************************
1219 
1220  //**Addition assignment to sparse vectors*******************************************************
1221  // No special implementation for the addition assignment to sparse vectors.
1222  //**********************************************************************************************
1223 
1224  //**Subtraction assignment to dense vectors*****************************************************
1237  template< typename VT1 > // Type of the target dense vector
1238  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1239  {
1241 
1242  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1243 
1244  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1245  return;
1246  }
1247 
1248  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1249  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1250 
1251  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1252  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1253  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1254  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1255 
1256  TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1257  }
1259  //**********************************************************************************************
1260 
1261  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1272  template< typename VT1 // Type of the left-hand side target vector
1273  , typename MT1 // Type of the left-hand side matrix operand
1274  , typename VT2 > // Type of the right-hand side vector operand
1275  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1276  {
1277  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1278  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1279  TDMatDVecMultExpr::selectDefaultSubAssignKernel( y, A, x );
1280  else
1281  TDMatDVecMultExpr::selectBlasSubAssignKernel( y, A, x );
1282  }
1284  //**********************************************************************************************
1285 
1286  //**Default subtraction assignment to dense vectors*********************************************
1300  template< typename VT1 // Type of the left-hand side target vector
1301  , typename MT1 // Type of the left-hand side matrix operand
1302  , typename VT2 > // Type of the right-hand side vector operand
1303  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1304  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1305  {
1306  const size_t M( A.rows() );
1307  const size_t N( A.columns() );
1308 
1309  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
1310  const size_t iend( M & size_t(-2) );
1311 
1312  for( size_t j=0UL; j<N; ++j ) {
1313  for( size_t i=0UL; i<iend; i+=2UL ) {
1314  y[i ] -= x[j] * A(i ,j);
1315  y[i+1UL] -= x[j] * A(i+1UL,j);
1316  }
1317  if( iend < M ) {
1318  y[iend] -= x[j] * A(iend,j);
1319  }
1320  }
1321  }
1323  //**********************************************************************************************
1324 
1325  //**Vectorized default subtraction assignment to dense vectors**********************************
1339  template< typename VT1 // Type of the left-hand side target vector
1340  , typename MT1 // Type of the left-hand side matrix operand
1341  , typename VT2 > // Type of the right-hand side vector operand
1342  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1343  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1344  {
1345  typedef IntrinsicTrait<ElementType> IT;
1346 
1347  const size_t M( A.rows() );
1348  const size_t N( A.columns() );
1349 
1350  size_t i( 0UL );
1351 
1352  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1353  IntrinsicType xmm1( y.load(i ) );
1354  IntrinsicType xmm2( y.load(i+IT::size ) );
1355  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1356  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1357  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1358  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1359  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1360  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1361  for( size_t j=0UL; j<N; ++j ) {
1362  const IntrinsicType x1( set( x[j] ) );
1363  xmm1 = xmm1 - A.load(i ,j) * x1;
1364  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1365  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1366  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1367  xmm5 = xmm5 - A.load(i+IT::size*4UL,j) * x1;
1368  xmm6 = xmm6 - A.load(i+IT::size*5UL,j) * x1;
1369  xmm7 = xmm7 - A.load(i+IT::size*6UL,j) * x1;
1370  xmm8 = xmm8 - A.load(i+IT::size*7UL,j) * x1;
1371  }
1372  y.store( i , xmm1 );
1373  y.store( i+IT::size , xmm2 );
1374  y.store( i+IT::size*2UL, xmm3 );
1375  y.store( i+IT::size*3UL, xmm4 );
1376  y.store( i+IT::size*4UL, xmm5 );
1377  y.store( i+IT::size*5UL, xmm6 );
1378  y.store( i+IT::size*6UL, xmm7 );
1379  y.store( i+IT::size*7UL, xmm8 );
1380  }
1381  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1382  IntrinsicType xmm1( y.load(i ) );
1383  IntrinsicType xmm2( y.load(i+IT::size ) );
1384  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1385  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1386  for( size_t j=0UL; j<N; ++j ) {
1387  const IntrinsicType x1( set( x[j] ) );
1388  xmm1 = xmm1 - A.load(i ,j) * x1;
1389  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1390  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1391  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1392  }
1393  y.store( i , xmm1 );
1394  y.store( i+IT::size , xmm2 );
1395  y.store( i+IT::size*2UL, xmm3 );
1396  y.store( i+IT::size*3UL, xmm4 );
1397  }
1398  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
1399  IntrinsicType xmm1( y.load(i ) );
1400  IntrinsicType xmm2( y.load(i+IT::size ) );
1401  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1402  for( size_t j=0UL; j<N; ++j ) {
1403  const IntrinsicType x1( set( x[j] ) );
1404  xmm1 = xmm1 - A.load(i ,j) * x1;
1405  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1406  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1407  }
1408  y.store( i , xmm1 );
1409  y.store( i+IT::size , xmm2 );
1410  y.store( i+IT::size*2UL, xmm3 );
1411  }
1412  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1413  IntrinsicType xmm1( y.load(i ) );
1414  IntrinsicType xmm2( y.load(i+IT::size) );
1415  for( size_t j=0UL; j<N; ++j ) {
1416  const IntrinsicType x1( set( x[j] ) );
1417  xmm1 = xmm1 - A.load(i ,j) * x1;
1418  xmm2 = xmm2 - A.load(i+IT::size,j) * x1;
1419  }
1420  y.store( i , xmm1 );
1421  y.store( i+IT::size, xmm2 );
1422  }
1423  if( i < M ) {
1424  IntrinsicType xmm1( y.load(i) );
1425  for( size_t j=0UL; j<N; ++j ) {
1426  xmm1 = xmm1 - A.load(i,j) * set( x[j] );
1427  }
1428  y.store( i, xmm1 );
1429  }
1430  }
1432  //**********************************************************************************************
1433 
1434  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1448  template< typename VT1 // Type of the left-hand side target vector
1449  , typename MT1 // Type of the left-hand side matrix operand
1450  , typename VT2 > // Type of the right-hand side vector operand
1451  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1452  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1453  {
1454  selectDefaultSubAssignKernel( y, A, x );
1455  }
1457  //**********************************************************************************************
1458 
1459  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1460 #if BLAZE_BLAS_MODE
1461 
1474  template< typename VT1 // Type of the left-hand side target vector
1475  , typename MT1 // Type of the left-hand side matrix operand
1476  , typename VT2 > // Type of the right-hand side vector operand
1477  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1478  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1479  {
1480  using boost::numeric_cast;
1481 
1485 
1486  const int M ( numeric_cast<int>( A.rows() ) );
1487  const int N ( numeric_cast<int>( A.columns() ) );
1488  const int lda( numeric_cast<int>( A.spacing() ) );
1489 
1490  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -1.0F,
1491  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1492  }
1494 #endif
1495  //**********************************************************************************************
1496 
1497  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1498 #if BLAZE_BLAS_MODE
1499 
1512  template< typename VT1 // Type of the left-hand side target vector
1513  , typename MT1 // Type of the left-hand side matrix operand
1514  , typename VT2 > // Type of the right-hand side vector operand
1515  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1516  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1517  {
1518  using boost::numeric_cast;
1519 
1523 
1524  const int M ( numeric_cast<int>( A.rows() ) );
1525  const int N ( numeric_cast<int>( A.columns() ) );
1526  const int lda( numeric_cast<int>( A.spacing() ) );
1527 
1528  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -1.0,
1529  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1530  }
1532 #endif
1533  //**********************************************************************************************
1534 
1535  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1536 #if BLAZE_BLAS_MODE
1537 
1550  template< typename VT1 // Type of the left-hand side target vector
1551  , typename MT1 // Type of the left-hand side matrix operand
1552  , typename VT2 > // Type of the right-hand side vector operand
1553  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1554  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1555  {
1556  using boost::numeric_cast;
1557 
1561  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1562  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1563  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1564 
1565  const int M ( numeric_cast<int>( A.rows() ) );
1566  const int N ( numeric_cast<int>( A.columns() ) );
1567  const int lda( numeric_cast<int>( A.spacing() ) );
1568  const complex<float> alpha( -1.0F, 0.0F );
1569  const complex<float> beta ( 1.0F, 0.0F );
1570 
1571  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1572  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1573  }
1575 #endif
1576  //**********************************************************************************************
1577 
1578  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1579 #if BLAZE_BLAS_MODE
1580 
1593  template< typename VT1 // Type of the left-hand side target vector
1594  , typename MT1 // Type of the left-hand side matrix operand
1595  , typename VT2 > // Type of the right-hand side vector operand
1596  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1597  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1598  {
1599  using boost::numeric_cast;
1600 
1604  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1605  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1606  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1607 
1608  const int M ( numeric_cast<int>( A.rows() ) );
1609  const int N ( numeric_cast<int>( A.columns() ) );
1610  const int lda( numeric_cast<int>( A.spacing() ) );
1611  const complex<double> alpha( -1.0, 0.0 );
1612  const complex<double> beta ( 1.0, 0.0 );
1613 
1614  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
1615  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1616  }
1618 #endif
1619  //**********************************************************************************************
1620 
1621  //**Subtraction assignment to sparse vectors****************************************************
1622  // No special implementation for the subtraction assignment to sparse vectors.
1623  //**********************************************************************************************
1624 
1625  //**Multiplication assignment to dense vectors**************************************************
1638  template< typename VT1 > // Type of the target dense vector
1639  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1640  {
1642 
1646 
1647  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1648 
1649  const ResultType tmp( serial( rhs ) );
1650  multAssign( ~lhs, tmp );
1651  }
1653  //**********************************************************************************************
1654 
1655  //**Multiplication assignment to sparse vectors*************************************************
1656  // No special implementation for the multiplication assignment to sparse vectors.
1657  //**********************************************************************************************
1658 
1659  //**SMP assignment to dense vectors*************************************************************
1674  template< typename VT1 > // Type of the target dense vector
1675  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1676  smpAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1677  {
1679 
1680  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1681 
1682  if( rhs.mat_.rows() == 0UL ) {
1683  return;
1684  }
1685  else if( rhs.mat_.columns() == 0UL ) {
1686  reset( ~lhs );
1687  return;
1688  }
1689 
1690  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1691  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1692 
1693  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1694  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1695  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1696  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1697 
1698  smpAssign( ~lhs, A * x );
1699  }
1701  //**********************************************************************************************
1702 
1703  //**SMP assignment to sparse vectors************************************************************
1718  template< typename VT1 > // Type of the target sparse vector
1719  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1720  smpAssign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1721  {
1723 
1727 
1728  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1729 
1730  const ResultType tmp( rhs );
1731  smpAssign( ~lhs, tmp );
1732  }
1734  //**********************************************************************************************
1735 
1736  //**SMP addition assignment to dense vectors****************************************************
1751  template< typename VT1 > // Type of the target dense vector
1752  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1753  smpAddAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1754  {
1756 
1757  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1758 
1759  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1760  return;
1761  }
1762 
1763  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1764  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1765 
1766  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1767  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1768  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1769  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1770 
1771  smpAddAssign( ~lhs, A * x );
1772  }
1774  //**********************************************************************************************
1775 
1776  //**SMP addition assignment to sparse vectors***************************************************
1777  // No special implementation for the SMP addition assignment to sparse vectors.
1778  //**********************************************************************************************
1779 
1780  //**SMP subtraction assignment to dense vectors*************************************************
1795  template< typename VT1 > // Type of the target dense vector
1796  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1797  smpSubAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1798  {
1800 
1801  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1802 
1803  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1804  return;
1805  }
1806 
1807  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
1808  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
1809 
1810  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1811  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1812  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1813  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1814 
1815  smpSubAssign( ~lhs, A * x );
1816  }
1818  //**********************************************************************************************
1819 
1820  //**SMP subtraction assignment to sparse vectors************************************************
1821  // No special implementation for the SMP subtraction assignment to sparse vectors.
1822  //**********************************************************************************************
1823 
1824  //**SMP multiplication assignment to dense vectors**********************************************
1839  template< typename VT1 > // Type of the target dense vector
1840  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1841  smpMultAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1842  {
1844 
1848 
1849  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1850 
1851  const ResultType tmp( rhs );
1852  smpMultAssign( ~lhs, tmp );
1853  }
1855  //**********************************************************************************************
1856 
1857  //**SMP multiplication assignment to sparse vectors*********************************************
1858  // No special implementation for the SMP multiplication assignment to sparse vectors.
1859  //**********************************************************************************************
1860 
1861  //**Compile time checks*************************************************************************
1868  //**********************************************************************************************
1869 };
1870 //*************************************************************************************************
1871 
1872 
1873 
1874 
1875 //=================================================================================================
1876 //
1877 // DVECSCALARMULTEXPR SPECIALIZATION
1878 //
1879 //=================================================================================================
1880 
1881 //*************************************************************************************************
1890 template< typename MT // Type of the left-hand side dense matrix
1891  , typename VT // Type of the right-hand side dense vector
1892  , typename ST > // Type of the side scalar value
1893 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
1894  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1895  , private VecScalarMultExpr
1896  , private Computation
1897 {
1898  private:
1899  //**Type definitions****************************************************************************
1900  typedef TDMatDVecMultExpr<MT,VT> MVM;
1901  typedef typename MVM::ResultType RES;
1902  typedef typename MT::ResultType MRT;
1903  typedef typename VT::ResultType VRT;
1904  typedef typename MRT::ElementType MET;
1905  typedef typename VRT::ElementType VET;
1906  typedef typename MT::CompositeType MCT;
1907  typedef typename VT::CompositeType VCT;
1908  //**********************************************************************************************
1909 
1910  //**********************************************************************************************
1912  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1913  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1914  //**********************************************************************************************
1915 
1916  //**********************************************************************************************
1918  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1919  //**********************************************************************************************
1920 
1921  //**********************************************************************************************
1923 
1926  template< typename T1 >
1927  struct UseSMPAssign {
1928  enum { value = ( evaluateMatrix || evaluateVector ) };
1929  };
1930  //**********************************************************************************************
1931 
1932  //**********************************************************************************************
1934 
1937  template< typename T1, typename T2, typename T3, typename T4 >
1938  struct UseSinglePrecisionKernel {
1939  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1940  IsFloat<typename T1::ElementType>::value &&
1941  IsFloat<typename T2::ElementType>::value &&
1942  IsFloat<typename T3::ElementType>::value &&
1943  !IsComplex<T4>::value };
1944  };
1945  //**********************************************************************************************
1946 
1947  //**********************************************************************************************
1949 
1952  template< typename T1, typename T2, typename T3, typename T4 >
1953  struct UseDoublePrecisionKernel {
1954  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1955  IsDouble<typename T1::ElementType>::value &&
1956  IsDouble<typename T2::ElementType>::value &&
1957  IsDouble<typename T3::ElementType>::value &&
1958  !IsComplex<T4>::value };
1959  };
1960  //**********************************************************************************************
1961 
1962  //**********************************************************************************************
1964 
1967  template< typename T1, typename T2, typename T3 >
1968  struct UseSinglePrecisionComplexKernel {
1969  typedef complex<float> Type;
1970  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1971  IsSame<typename T1::ElementType,Type>::value &&
1972  IsSame<typename T2::ElementType,Type>::value &&
1973  IsSame<typename T3::ElementType,Type>::value };
1974  };
1975  //**********************************************************************************************
1976 
1977  //**********************************************************************************************
1979 
1982  template< typename T1, typename T2, typename T3 >
1983  struct UseDoublePrecisionComplexKernel {
1984  typedef complex<double> Type;
1985  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1986  IsSame<typename T1::ElementType,Type>::value &&
1987  IsSame<typename T2::ElementType,Type>::value &&
1988  IsSame<typename T3::ElementType,Type>::value };
1989  };
1990  //**********************************************************************************************
1991 
1992  //**********************************************************************************************
1994 
1996  template< typename T1, typename T2, typename T3, typename T4 >
1997  struct UseDefaultKernel {
1998  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1999  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2000  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2001  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2002  };
2003  //**********************************************************************************************
2004 
2005  //**********************************************************************************************
2007 
2010  template< typename T1, typename T2, typename T3, typename T4 >
2011  struct UseVectorizedDefaultKernel {
2012  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2013  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2014  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2015  IsSame<typename T1::ElementType,T4>::value &&
2016  IntrinsicTrait<typename T1::ElementType>::addition &&
2017  IntrinsicTrait<typename T1::ElementType>::multiplication };
2018  };
2019  //**********************************************************************************************
2020 
2021  public:
2022  //**Type definitions****************************************************************************
2023  typedef DVecScalarMultExpr<MVM,ST,false> This;
2024  typedef typename MultTrait<RES,ST>::Type ResultType;
2025  typedef typename ResultType::TransposeType TransposeType;
2026  typedef typename ResultType::ElementType ElementType;
2027  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2028  typedef const ElementType ReturnType;
2029  typedef const ResultType CompositeType;
2030 
2032  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
2033 
2035  typedef ST RightOperand;
2036 
2038  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
2039 
2041  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
2042  //**********************************************************************************************
2043 
2044  //**Compilation flags***************************************************************************
2046  enum { vectorizable = MT::vectorizable && VT::vectorizable &&
2047  IsSame<MET,VET>::value &&
2048  IsSame<MET,ST>::value &&
2049  IntrinsicTrait<MET>::addition &&
2050  IntrinsicTrait<MET>::multiplication };
2051 
2053  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2054  !evaluateVector && VT::smpAssignable };
2055  //**********************************************************************************************
2056 
2057  //**Constructor*********************************************************************************
2063  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
2064  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2065  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2066  {}
2067  //**********************************************************************************************
2068 
2069  //**Subscript operator**************************************************************************
2075  inline ReturnType operator[]( size_t index ) const {
2076  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2077  return vector_[index] * scalar_;
2078  }
2079  //**********************************************************************************************
2080 
2081  //**Size function*******************************************************************************
2086  inline size_t size() const {
2087  return vector_.size();
2088  }
2089  //**********************************************************************************************
2090 
2091  //**Left operand access*************************************************************************
2096  inline LeftOperand leftOperand() const {
2097  return vector_;
2098  }
2099  //**********************************************************************************************
2100 
2101  //**Right operand access************************************************************************
2106  inline RightOperand rightOperand() const {
2107  return scalar_;
2108  }
2109  //**********************************************************************************************
2110 
2111  //**********************************************************************************************
2117  template< typename T >
2118  inline bool canAlias( const T* alias ) const {
2119  return vector_.canAlias( alias );
2120  }
2121  //**********************************************************************************************
2122 
2123  //**********************************************************************************************
2129  template< typename T >
2130  inline bool isAliased( const T* alias ) const {
2131  return vector_.isAliased( alias );
2132  }
2133  //**********************************************************************************************
2134 
2135  //**********************************************************************************************
2140  inline bool isAligned() const {
2141  return vector_.isAligned();
2142  }
2143  //**********************************************************************************************
2144 
2145  //**********************************************************************************************
2150  inline bool canSMPAssign() const {
2151  typename MVM::LeftOperand A( vector_.leftOperand() );
2152  return ( !BLAZE_BLAS_IS_PARALLEL ||
2153  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2154  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2156  }
2157  //**********************************************************************************************
2158 
2159  private:
2160  //**Member variables****************************************************************************
2161  LeftOperand vector_;
2162  RightOperand scalar_;
2163  //**********************************************************************************************
2164 
2165  //**Assignment to dense vectors*****************************************************************
2177  template< typename VT1 > // Type of the target dense vector
2178  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2179  {
2181 
2182  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2183 
2184  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2185  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2186 
2187  if( left.rows() == 0UL ) {
2188  return;
2189  }
2190  else if( left.columns() == 0UL ) {
2191  reset( ~lhs );
2192  return;
2193  }
2194 
2195  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2196  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2197 
2198  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2199  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2200  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2201  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2202 
2203  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2204  }
2205  //**********************************************************************************************
2206 
2207  //**Assignment to dense vectors (kernel selection)**********************************************
2218  template< typename VT1 // Type of the left-hand side target vector
2219  , typename MT1 // Type of the left-hand side matrix operand
2220  , typename VT2 // Type of the right-hand side vector operand
2221  , typename ST2 > // Type of the scalar value
2222  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2223  {
2224  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2225  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2226  DVecScalarMultExpr::selectDefaultAssignKernel( y, A, x, scalar );
2227  else
2228  DVecScalarMultExpr::selectBlasAssignKernel( y, A, x, scalar );
2229  }
2230  //**********************************************************************************************
2231 
2232  //**Default assignment to dense vectors*********************************************************
2246  template< typename VT1 // Type of the left-hand side target vector
2247  , typename MT1 // Type of the left-hand side matrix operand
2248  , typename VT2 // Type of the right-hand side vector operand
2249  , typename ST2 > // Type of the scalar value
2250  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2251  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2252  {
2253  const size_t M( A.rows() );
2254  const size_t N( A.columns() );
2255 
2256  BLAZE_INTERNAL_ASSERT( ( M - ( M % 2UL ) ) == ( M & size_t(-2) ), "Invalid end calculation" );
2257  const size_t iend( M & size_t(-2) );
2258 
2259  for( size_t i=0UL; i<M; ++i ) {
2260  y[i] = x[0UL] * A(i,0UL);
2261  }
2262  for( size_t j=1UL; j<N; ++j ) {
2263  for( size_t i=0UL; i<iend; i+=2UL ) {
2264  y[i ] += x[j] * A(i ,j);
2265  y[i+1UL] += x[j] * A(i+1UL,j);
2266  }
2267  if( iend < M ) {
2268  y[iend] += x[j] * A(iend,j);
2269  }
2270  }
2271  for( size_t i=0UL; i<M; ++i ) {
2272  y[i] *= scalar;
2273  }
2274  }
2275  //**********************************************************************************************
2276 
2277  //**Vectorized default assignment to dense vectors**********************************************
2291  template< typename VT1 // Type of the left-hand side target vector
2292  , typename MT1 // Type of the left-hand side matrix operand
2293  , typename VT2 // Type of the right-hand side vector operand
2294  , typename ST2 > // Type of the scalar value
2295  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2296  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2297  {
2298  typedef IntrinsicTrait<ElementType> IT;
2299 
2300  const size_t M( A.rows() );
2301  const size_t N( A.columns() );
2302 
2303  const IntrinsicType factor( set( scalar ) );
2304 
2305  size_t i( 0UL );
2306 
2307  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2308  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2309  for( size_t j=0UL; j<N; ++j ) {
2310  const IntrinsicType x1( set( x[j] ) );
2311  xmm1 = xmm1 + A.load(i ,j) * x1;
2312  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2313  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2314  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2315  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2316  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2317  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2318  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2319  }
2320  y.store( i , xmm1*factor );
2321  y.store( i+IT::size , xmm2*factor );
2322  y.store( i+IT::size*2UL, xmm3*factor );
2323  y.store( i+IT::size*3UL, xmm4*factor );
2324  y.store( i+IT::size*4UL, xmm5*factor );
2325  y.store( i+IT::size*5UL, xmm6*factor );
2326  y.store( i+IT::size*6UL, xmm7*factor );
2327  y.store( i+IT::size*7UL, xmm8*factor );
2328  }
2329  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2330  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2331  for( size_t j=0UL; j<N; ++j ) {
2332  const IntrinsicType x1( set( x[j] ) );
2333  xmm1 = xmm1 + A.load(i ,j) * x1;
2334  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2335  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2336  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2337  }
2338  y.store( i , xmm1*factor );
2339  y.store( i+IT::size , xmm2*factor );
2340  y.store( i+IT::size*2UL, xmm3*factor );
2341  y.store( i+IT::size*3UL, xmm4*factor );
2342  }
2343  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2344  IntrinsicType xmm1, xmm2, xmm3;
2345  for( size_t j=0UL; j<N; ++j ) {
2346  const IntrinsicType x1( set( x[j] ) );
2347  xmm1 = xmm1 + A.load(i ,j) * x1;
2348  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2349  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2350  }
2351  y.store( i , xmm1*factor );
2352  y.store( i+IT::size , xmm2*factor );
2353  y.store( i+IT::size*2UL, xmm3*factor );
2354  }
2355  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2356  IntrinsicType xmm1, xmm2;
2357  for( size_t j=0UL; j<N; ++j ) {
2358  const IntrinsicType x1( set( x[j] ) );
2359  xmm1 = xmm1 + A.load(i ,j) * x1;
2360  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2361  }
2362  y.store( i , xmm1*factor );
2363  y.store( i+IT::size, xmm2*factor );
2364  }
2365  if( i < M ) {
2366  IntrinsicType xmm1;
2367  for( size_t j=0UL; j<N; ++j ) {
2368  const IntrinsicType x1( set( x[j] ) );
2369  xmm1 = xmm1 + A.load(i,j) * x1;
2370  }
2371  y.store( i, xmm1*factor );
2372  }
2373  }
2374  //**********************************************************************************************
2375 
2376  //**BLAS-based assignment to dense vectors (default)********************************************
2390  template< typename VT1 // Type of the left-hand side target vector
2391  , typename MT1 // Type of the left-hand side matrix operand
2392  , typename VT2 // Type of the right-hand side vector operand
2393  , typename ST2 > // Type of the scalar value
2394  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2395  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2396  {
2397  selectDefaultAssignKernel( y, A, x, scalar );
2398  }
2399  //**********************************************************************************************
2400 
2401  //**BLAS-based assignment to dense vectors (single precision)***********************************
2402 #if BLAZE_BLAS_MODE
2403 
2416  template< typename VT1 // Type of the left-hand side target vector
2417  , typename MT1 // Type of the left-hand side matrix operand
2418  , typename VT2 // Type of the right-hand side vector operand
2419  , typename ST2 > // Type of the scalar value
2420  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2421  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2422  {
2423  using boost::numeric_cast;
2424 
2428 
2429  const int M ( numeric_cast<int>( A.rows() ) );
2430  const int N ( numeric_cast<int>( A.columns() ) );
2431  const int lda( numeric_cast<int>( A.spacing() ) );
2432 
2433  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2434  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2435  }
2436 #endif
2437  //**********************************************************************************************
2438 
2439  //**BLAS-based assignment to dense vectors (double precision)***********************************
2440 #if BLAZE_BLAS_MODE
2441 
2454  template< typename VT1 // Type of the left-hand side target vector
2455  , typename MT1 // Type of the left-hand side matrix operand
2456  , typename VT2 // Type of the right-hand side vector operand
2457  , typename ST2 > // Type of the scalar value
2458  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2459  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2460  {
2461  using boost::numeric_cast;
2462 
2466 
2467  const int M ( numeric_cast<int>( A.rows() ) );
2468  const int N ( numeric_cast<int>( A.columns() ) );
2469  const int lda( numeric_cast<int>( A.spacing() ) );
2470 
2471  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2472  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2473  }
2474 #endif
2475  //**********************************************************************************************
2476 
2477  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2478 #if BLAZE_BLAS_MODE
2479 
2492  template< typename VT1 // Type of the left-hand side target vector
2493  , typename MT1 // Type of the left-hand side matrix operand
2494  , typename VT2 // Type of the right-hand side vector operand
2495  , typename ST2 > // Type of the scalar value
2496  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2497  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2498  {
2499  using boost::numeric_cast;
2500 
2504  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2505  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2506  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2507 
2508  const int M ( numeric_cast<int>( A.rows() ) );
2509  const int N ( numeric_cast<int>( A.columns() ) );
2510  const int lda( numeric_cast<int>( A.spacing() ) );
2511  const complex<float> alpha( scalar );
2512  const complex<float> beta ( 0.0F, 0.0F );
2513 
2514  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2515  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2516  }
2517 #endif
2518  //**********************************************************************************************
2519 
2520  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2521 #if BLAZE_BLAS_MODE
2522 
2535  template< typename VT1 // Type of the left-hand side target vector
2536  , typename MT1 // Type of the left-hand side matrix operand
2537  , typename VT2 // Type of the right-hand side vector operand
2538  , typename ST2 > // Type of the scalar value
2539  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2540  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2541  {
2542  using boost::numeric_cast;
2543 
2547  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2548  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2549  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2550 
2551  const int M ( numeric_cast<int>( A.rows() ) );
2552  const int N ( numeric_cast<int>( A.columns() ) );
2553  const int lda( numeric_cast<int>( A.spacing() ) );
2554  const complex<double> alpha( scalar );
2555  const complex<double> beta ( 0.0, 0.0 );
2556 
2557  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2558  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2559  }
2560 #endif
2561  //**********************************************************************************************
2562 
2563  //**Assignment to sparse vectors****************************************************************
2575  template< typename VT1 > // Type of the target sparse vector
2576  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2577  {
2579 
2583 
2584  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2585 
2586  const ResultType tmp( serial( rhs ) );
2587  assign( ~lhs, tmp );
2588  }
2589  //**********************************************************************************************
2590 
2591  //**Addition assignment to dense vectors********************************************************
2603  template< typename VT1 > // Type of the target dense vector
2604  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2605  {
2607 
2608  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2609 
2610  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2611  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2612 
2613  if( left.rows() == 0UL || left.columns() == 0UL ) {
2614  return;
2615  }
2616 
2617  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2618  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2619 
2620  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2621  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2622  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2623  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2624 
2625  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2626  }
2627  //**********************************************************************************************
2628 
2629  //**Addition assignment to dense vectors (kernel selection)*************************************
2640  template< typename VT1 // Type of the left-hand side target vector
2641  , typename MT1 // Type of the left-hand side matrix operand
2642  , typename VT2 // Type of the right-hand side vector operand
2643  , typename ST2 > // Type of the scalar value
2644  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2645  {
2646  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2647  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2648  DVecScalarMultExpr::selectDefaultAddAssignKernel( y, A, x, scalar );
2649  else
2650  DVecScalarMultExpr::selectBlasAddAssignKernel( y, A, x, scalar );
2651  }
2652  //**********************************************************************************************
2653 
2654  //**Default addition assignment to dense vectors************************************************
2668  template< typename VT1 // Type of the left-hand side target vector
2669  , typename MT1 // Type of the left-hand side matrix operand
2670  , typename VT2 // Type of the right-hand side vector operand
2671  , typename ST2 > // Type of the scalar value
2672  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2673  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2674  {
2675  y.addAssign( A * x * scalar );
2676  }
2677  //**********************************************************************************************
2678 
2679  //**Vectorized default addition assignment to dense vectors*************************************
2693  template< typename VT1 // Type of the left-hand side target vector
2694  , typename MT1 // Type of the left-hand side matrix operand
2695  , typename VT2 // Type of the right-hand side vector operand
2696  , typename ST2 > // Type of the scalar value
2697  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2698  selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2699  {
2700  typedef IntrinsicTrait<ElementType> IT;
2701 
2702  const size_t M( A.rows() );
2703  const size_t N( A.columns() );
2704 
2705  const IntrinsicType factor( set( scalar ) );
2706 
2707  size_t i( 0UL );
2708 
2709  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2710  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2711  for( size_t j=0UL; j<N; ++j ) {
2712  const IntrinsicType x1( set( x[j] ) );
2713  xmm1 = xmm1 + A.load(i ,j) * x1;
2714  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2715  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2716  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2717  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2718  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2719  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2720  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2721  }
2722  y.store( i , y.load(i ) + xmm1*factor );
2723  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2724  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2725  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2726  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
2727  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
2728  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
2729  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
2730  }
2731  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2732  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2733  for( size_t j=0UL; j<N; ++j ) {
2734  const IntrinsicType x1( set( x[j] ) );
2735  xmm1 = xmm1 + A.load(i ,j) * x1;
2736  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2737  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2738  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2739  }
2740  y.store( i , y.load(i ) + xmm1*factor );
2741  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2742  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2743  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
2744  }
2745  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
2746  IntrinsicType xmm1, xmm2, xmm3;
2747  for( size_t j=0UL; j<N; ++j ) {
2748  const IntrinsicType x1( set( x[j] ) );
2749  xmm1 = xmm1 + A.load(i ,j) * x1;
2750  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2751  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2752  }
2753  y.store( i , y.load(i ) + xmm1*factor );
2754  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
2755  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
2756  }
2757  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2758  IntrinsicType xmm1, xmm2;
2759  for( size_t j=0UL; j<N; ++j ) {
2760  const IntrinsicType x1( set( x[j] ) );
2761  xmm1 = xmm1 + A.load(i ,j) * x1;
2762  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2763  }
2764  y.store( i , y.load(i ) + xmm1*factor );
2765  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
2766  }
2767  if( i < M ) {
2768  IntrinsicType xmm1;
2769  for( size_t j=0UL; j<N; ++j ) {
2770  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
2771  }
2772  y.store( i, y.load(i) + xmm1*factor );
2773  }
2774  }
2775  //**********************************************************************************************
2776 
2777  //**BLAS-based addition assignment to dense vectors (default)***********************************
2791  template< typename VT1 // Type of the left-hand side target vector
2792  , typename MT1 // Type of the left-hand side matrix operand
2793  , typename VT2 // Type of the right-hand side vector operand
2794  , typename ST2 > // Type of the scalar value
2795  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2796  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2797  {
2798  selectDefaultAddAssignKernel( y, A, x, scalar );
2799  }
2800  //**********************************************************************************************
2801 
2802  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2803 #if BLAZE_BLAS_MODE
2804 
2817  template< typename VT1 // Type of the left-hand side target vector
2818  , typename MT1 // Type of the left-hand side matrix operand
2819  , typename VT2 // Type of the right-hand side vector operand
2820  , typename ST2 > // Type of the scalar value
2821  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2822  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2823  {
2824  using boost::numeric_cast;
2825 
2829 
2830  const int M ( numeric_cast<int>( A.rows() ) );
2831  const int N ( numeric_cast<int>( A.columns() ) );
2832  const int lda( numeric_cast<int>( A.spacing() ) );
2833 
2834  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2835  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2836  }
2837 #endif
2838  //**********************************************************************************************
2839 
2840  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2841 #if BLAZE_BLAS_MODE
2842 
2855  template< typename VT1 // Type of the left-hand side target vector
2856  , typename MT1 // Type of the left-hand side matrix operand
2857  , typename VT2 // Type of the right-hand side vector operand
2858  , typename ST2 > // Type of the scalar value
2859  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2860  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2861  {
2862  using boost::numeric_cast;
2863 
2867 
2868  const int M ( numeric_cast<int>( A.rows() ) );
2869  const int N ( numeric_cast<int>( A.columns() ) );
2870  const int lda( numeric_cast<int>( A.spacing() ) );
2871 
2872  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, scalar,
2873  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2874  }
2875 #endif
2876  //**********************************************************************************************
2877 
2878  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2879 #if BLAZE_BLAS_MODE
2880 
2893  template< typename VT1 // Type of the left-hand side target vector
2894  , typename MT1 // Type of the left-hand side matrix operand
2895  , typename VT2 // Type of the right-hand side vector operand
2896  , typename ST2 > // Type of the scalar value
2897  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2898  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2899  {
2900  using boost::numeric_cast;
2901 
2905  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2906  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2907  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2908 
2909  const int M ( numeric_cast<int>( A.rows() ) );
2910  const int N ( numeric_cast<int>( A.columns() ) );
2911  const int lda( numeric_cast<int>( A.spacing() ) );
2912  const complex<float> alpha( scalar );
2913  const complex<float> beta ( 1.0F, 0.0F );
2914 
2915  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2916  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2917  }
2918 #endif
2919  //**********************************************************************************************
2920 
2921  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2922 #if BLAZE_BLAS_MODE
2923 
2936  template< typename VT1 // Type of the left-hand side target vector
2937  , typename MT1 // Type of the left-hand side matrix operand
2938  , typename VT2 // Type of the right-hand side vector operand
2939  , typename ST2 > // Type of the scalar value
2940  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2941  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2942  {
2943  using boost::numeric_cast;
2944 
2948  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2949  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2950  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2951 
2952  const int M ( numeric_cast<int>( A.rows() ) );
2953  const int N ( numeric_cast<int>( A.columns() ) );
2954  const int lda( numeric_cast<int>( A.spacing() ) );
2955  const complex<double> alpha( scalar );
2956  const complex<double> beta ( 1.0, 0.0 );
2957 
2958  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2959  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2960  }
2961 #endif
2962  //**********************************************************************************************
2963 
2964  //**Addition assignment to sparse vectors*******************************************************
2965  // No special implementation for the addition assignment to sparse vectors.
2966  //**********************************************************************************************
2967 
2968  //**Subtraction assignment to dense vectors*****************************************************
2980  template< typename VT1 > // Type of the target dense vector
2981  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2982  {
2984 
2985  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2986 
2987  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2988  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2989 
2990  if( left.rows() == 0UL || left.columns() == 0UL ) {
2991  return;
2992  }
2993 
2994  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2995  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2996 
2997  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2998  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2999  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3000  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3001 
3002  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
3003  }
3004  //**********************************************************************************************
3005 
3006  //**Subtraction assignment to dense vectors (kernel selection)**********************************
3017  template< typename VT1 // Type of the left-hand side target vector
3018  , typename MT1 // Type of the left-hand side matrix operand
3019  , typename VT2 // Type of the right-hand side vector operand
3020  , typename ST2 > // Type of the scalar value
3021  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3022  {
3023  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
3024  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3025  DVecScalarMultExpr::selectDefaultSubAssignKernel( y, A, x, scalar );
3026  else
3027  DVecScalarMultExpr::selectBlasSubAssignKernel( y, A, x, scalar );
3028  }
3029  //**********************************************************************************************
3030 
3031  //**Default subtraction assignment to dense vectors*********************************************
3045  template< typename VT1 // Type of the left-hand side target vector
3046  , typename MT1 // Type of the left-hand side matrix operand
3047  , typename VT2 // Type of the right-hand side vector operand
3048  , typename ST2 > // Type of the scalar value
3049  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3050  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3051  {
3052  y.subAssign( A * x * scalar );
3053  }
3054  //**********************************************************************************************
3055 
3056  //**Vectorized default subtraction assignment to dense vectors**********************************
3070  template< typename VT1 // Type of the left-hand side target vector
3071  , typename MT1 // Type of the left-hand side matrix operand
3072  , typename VT2 // Type of the right-hand side vector operand
3073  , typename ST2 > // Type of the scalar value
3074  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3075  selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3076  {
3077  typedef IntrinsicTrait<ElementType> IT;
3078 
3079  const size_t M( A.rows() );
3080  const size_t N( A.columns() );
3081 
3082  const IntrinsicType factor( set( scalar ) );
3083 
3084  size_t i( 0UL );
3085 
3086  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3087  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3088  for( size_t j=0UL; j<N; ++j ) {
3089  const IntrinsicType x1( set( x[j] ) );
3090  xmm1 = xmm1 + A.load(i ,j) * x1;
3091  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3092  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3093  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3094  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3095  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3096  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3097  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3098  }
3099  y.store( i , y.load(i ) - xmm1*factor );
3100  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3101  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3102  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
3103  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
3104  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
3105  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
3106  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
3107  }
3108  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3109  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3110  for( size_t j=0UL; j<N; ++j ) {
3111  const IntrinsicType x1( set( x[j] ) );
3112  xmm1 = xmm1 + A.load(i ,j) * x1;
3113  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3114  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3115  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3116  }
3117  y.store( i , y.load(i ) - xmm1*factor );
3118  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3119  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3120  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
3121  }
3122  for( ; (i+IT::size*2UL) < M; i+=IT::size*3UL ) {
3123  IntrinsicType xmm1, xmm2, xmm3;
3124  for( size_t j=0UL; j<N; ++j ) {
3125  const IntrinsicType x1( set( x[j] ) );
3126  xmm1 = xmm1 + A.load(i ,j) * x1;
3127  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3128  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3129  }
3130  y.store( i , y.load(i ) - xmm1*factor );
3131  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3132  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3133  }
3134  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3135  IntrinsicType xmm1, xmm2;
3136  for( size_t j=0UL; j<N; ++j ) {
3137  const IntrinsicType x1( set( x[j] ) );
3138  xmm1 = xmm1 + A.load(i ,j) * x1;
3139  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3140  }
3141  y.store( i , y.load(i ) - xmm1*factor );
3142  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
3143  }
3144  if( i < M ) {
3145  IntrinsicType xmm1;
3146  for( size_t j=0UL; j<N; ++j ) {
3147  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3148  }
3149  y.store( i, y.load(i) - xmm1*factor );
3150  }
3151  }
3152  //**********************************************************************************************
3153 
3154  //**BLAS-based subtraction assignment to dense vectors (default)********************************
3168  template< typename VT1 // Type of the left-hand side target vector
3169  , typename MT1 // Type of the left-hand side matrix operand
3170  , typename VT2 // Type of the right-hand side vector operand
3171  , typename ST2 > // Type of the scalar value
3172  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3173  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3174  {
3175  selectDefaultSubAssignKernel( y, A, x, scalar );
3176  }
3177  //**********************************************************************************************
3178 
3179  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
3180 #if BLAZE_BLAS_MODE
3181 
3194  template< typename VT1 // Type of the left-hand side target vector
3195  , typename MT1 // Type of the left-hand side matrix operand
3196  , typename VT2 // Type of the right-hand side vector operand
3197  , typename ST2 > // Type of the scalar value
3198  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3199  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3200  {
3201  using boost::numeric_cast;
3202 
3206 
3207  const int M ( numeric_cast<int>( A.rows() ) );
3208  const int N ( numeric_cast<int>( A.columns() ) );
3209  const int lda( numeric_cast<int>( A.spacing() ) );
3210 
3211  cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
3212  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
3213  }
3214 #endif
3215  //**********************************************************************************************
3216 
3217  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
3218 #if BLAZE_BLAS_MODE
3219 
3232  template< typename VT1 // Type of the left-hand side target vector
3233  , typename MT1 // Type of the left-hand side matrix operand
3234  , typename VT2 // Type of the right-hand side vector operand
3235  , typename ST2 > // Type of the scalar value
3236  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3237  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3238  {
3239  using boost::numeric_cast;
3240 
3244 
3245  const int M ( numeric_cast<int>( A.rows() ) );
3246  const int N ( numeric_cast<int>( A.columns() ) );
3247  const int lda( numeric_cast<int>( A.spacing() ) );
3248 
3249  cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, -scalar,
3250  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
3251  }
3252 #endif
3253  //**********************************************************************************************
3254 
3255  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
3256 #if BLAZE_BLAS_MODE
3257 
3270  template< typename VT1 // Type of the left-hand side target vector
3271  , typename MT1 // Type of the left-hand side matrix operand
3272  , typename VT2 // Type of the right-hand side vector operand
3273  , typename ST2 > // Type of the scalar value
3274  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3275  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3276  {
3277  using boost::numeric_cast;
3278 
3282  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
3283  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
3284  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
3285 
3286  const int M ( numeric_cast<int>( A.rows() ) );
3287  const int N ( numeric_cast<int>( A.columns() ) );
3288  const int lda( numeric_cast<int>( A.spacing() ) );
3289  const complex<float> alpha( -scalar );
3290  const complex<float> beta ( 1.0F, 0.0F );
3291 
3292  cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
3293  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3294  }
3295 #endif
3296  //**********************************************************************************************
3297 
3298  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
3299 #if BLAZE_BLAS_MODE
3300 
3313  template< typename VT1 // Type of the left-hand side target vector
3314  , typename MT1 // Type of the left-hand side matrix operand
3315  , typename VT2 // Type of the right-hand side vector operand
3316  , typename ST2 > // Type of the scalar value
3317  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3318  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3319  {
3320  using boost::numeric_cast;
3321 
3325  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
3326  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
3327  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
3328 
3329  const int M ( numeric_cast<int>( A.rows() ) );
3330  const int N ( numeric_cast<int>( A.columns() ) );
3331  const int lda( numeric_cast<int>( A.spacing() ) );
3332  const complex<double> alpha( -scalar );
3333  const complex<double> beta ( 1.0, 0.0 );
3334 
3335  cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
3336  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3337  }
3338 #endif
3339  //**********************************************************************************************
3340 
3341  //**Subtraction assignment to sparse vectors****************************************************
3342  // No special implementation for the subtraction assignment to sparse vectors.
3343  //**********************************************************************************************
3344 
3345  //**Multiplication assignment to dense vectors**************************************************
3357  template< typename VT1 > // Type of the target dense vector
3358  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3359  {
3361 
3365 
3366  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3367 
3368  const ResultType tmp( serial( rhs ) );
3369  multAssign( ~lhs, tmp );
3370  }
3371  //**********************************************************************************************
3372 
3373  //**Multiplication assignment to sparse vectors*************************************************
3374  // No special implementation for the multiplication assignment to sparse vectors.
3375  //**********************************************************************************************
3376 
3377  //**SMP assignment to dense vectors**************************************************************
3391  template< typename VT1 > // Type of the target dense vector
3392  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3393  smpAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3394  {
3396 
3397  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3398 
3399  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3400  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3401 
3402  if( left.rows() == 0UL ) {
3403  return;
3404  }
3405  else if( left.columns() == 0UL ) {
3406  reset( ~lhs );
3407  return;
3408  }
3409 
3410  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3411  RT x( right ); // Evaluation of the right-hand side dense vector operand
3412 
3413  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3414  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3415  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3416  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3417 
3418  smpAssign( ~lhs, A * x * rhs.scalar_ );
3419  }
3420  //**********************************************************************************************
3421 
3422  //**SMP assignment to sparse vectors************************************************************
3436  template< typename VT1 > // Type of the target sparse vector
3437  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3438  smpAssign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3439  {
3441 
3445 
3446  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3447 
3448  const ResultType tmp( rhs );
3449  smpAssign( ~lhs, tmp );
3450  }
3451  //**********************************************************************************************
3452 
3453  //**SMP addition assignment to dense vectors****************************************************
3467  template< typename VT1 > // Type of the target dense vector
3468  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3469  smpAddAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3470  {
3472 
3473  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3474 
3475  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3476  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3477 
3478  if( left.rows() == 0UL || left.columns() == 0UL ) {
3479  return;
3480  }
3481 
3482  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3483  RT x( right ); // Evaluation of the right-hand side dense vector operand
3484 
3485  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3486  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3487  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3488  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3489 
3490  smpAddAssign( ~lhs, A * x * rhs.scalar_ );
3491  }
3492  //**********************************************************************************************
3493 
3494  //**SMP addition assignment to sparse vectors***************************************************
3495  // No special implementation for the SMP addition assignment to sparse vectors.
3496  //**********************************************************************************************
3497 
3498  //**SMP subtraction assignment to dense vectors*************************************************
3512  template< typename VT1 > // Type of the target dense vector
3513  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3514  smpSubAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3515  {
3517 
3518  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3519 
3520  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3521  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3522 
3523  if( left.rows() == 0UL || left.columns() == 0UL ) {
3524  return;
3525  }
3526 
3527  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3528  RT x( right ); // Evaluation of the right-hand side dense vector operand
3529 
3530  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3531  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3532  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3533  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3534 
3535  smpSubAssign( ~lhs, A * x * rhs.scalar_ );
3536  }
3537  //**********************************************************************************************
3538 
3539  //**SMP subtraction assignment to sparse vectors************************************************
3540  // No special implementation for the SMP subtraction assignment to sparse vectors.
3541  //**********************************************************************************************
3542 
3543  //**SMP multiplication assignment to dense vectors**********************************************
3558  template< typename VT1 > // Type of the target dense vector
3559  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3560  smpMultAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3561  {
3563 
3567 
3568  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3569 
3570  const ResultType tmp( rhs );
3571  smpMultAssign( ~lhs, tmp );
3572  }
3573  //**********************************************************************************************
3574 
3575  //**SMP multiplication assignment to sparse vectors*********************************************
3576  // No special implementation for the SMP multiplication assignment to sparse vectors.
3577  //**********************************************************************************************
3578 
3579  //**Compile time checks*************************************************************************
3588  //**********************************************************************************************
3589 };
3591 //*************************************************************************************************
3592 
3593 
3594 
3595 
3596 //=================================================================================================
3597 //
3598 // GLOBAL BINARY ARITHMETIC OPERATORS
3599 //
3600 //=================================================================================================
3601 
3602 //*************************************************************************************************
3633 template< typename T1 // Type of the left-hand side dense matrix
3634  , typename T2 > // Type of the right-hand side dense vector
3635 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
3637 {
3639 
3640  if( (~mat).columns() != (~vec).size() )
3641  throw std::invalid_argument( "Matrix and vector sizes do not match" );
3642 
3643  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
3644 }
3645 //*************************************************************************************************
3646 
3647 
3648 
3649 
3650 //=================================================================================================
3651 //
3652 // EXPRESSION TRAIT SPECIALIZATIONS
3653 //
3654 //=================================================================================================
3655 
3656 //*************************************************************************************************
3658 template< typename MT, typename VT, bool AF >
3659 struct SubvectorExprTrait< TDMatDVecMultExpr<MT,VT>, AF >
3660 {
3661  public:
3662  //**********************************************************************************************
3663  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type, VT >::Type Type;
3664  //**********************************************************************************************
3665 };
3667 //*************************************************************************************************
3668 
3669 } // namespace blaze
3670 
3671 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:257
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:111
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:251
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:281
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:179
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
Header file for the DenseVector base class.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:244
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:397
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:126
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:253
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:254
Header file for the multiplication trait.
Header file for the IsDouble type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:398
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:110
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:333
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDMatDVecMultExpr.h:399
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:247
Constraints on the storage order of matrix types.
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:248
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:361
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:260
Header file for the EnableIf class template.
Header file for the serial shim.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:323
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:367
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:112
Header file for the SubmatrixExprTrait class template.
System settings for the BLAS mode.
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:243
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:246
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
const size_t TDMATDVECMULT_THRESHOLD
Column-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:74
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:355
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
const size_t SMP_TDMATDVECMULT_THRESHOLD
SMP column-major dense matrix/dense vector multiplication threshold.This threshold specifies when a c...
Definition: Thresholds.h:345
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:113
Header file for all intrinsic functionality.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:245
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:296
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:242
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:170
Header file for basic type definitions.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:115
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:387
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:377
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:343
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:114
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.