All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
65 #include <blaze/system/BLAS.h>
67 #include <blaze/util/Assert.h>
68 #include <blaze/util/Complex.h>
74 #include <blaze/util/DisableIf.h>
75 #include <blaze/util/EnableIf.h>
77 #include <blaze/util/SelectType.h>
78 #include <blaze/util/Types.h>
84 
85 
86 namespace blaze {
87 
88 //=================================================================================================
89 //
90 // CLASS TDVECDMATMULTEXPR
91 //
92 //=================================================================================================
93 
94 //*************************************************************************************************
101 template< typename VT // Type of the left-hand side dense vector
102  , typename MT > // Type of the right-hand side dense matrix
103 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
104  , private TVecMatMultExpr
105  , private Computation
106 {
107  private:
108  //**Type definitions****************************************************************************
109  typedef typename VT::ResultType VRT;
110  typedef typename MT::ResultType MRT;
111  typedef typename VRT::ElementType VET;
112  typedef typename MRT::ElementType MET;
113  typedef typename VT::CompositeType VCT;
114  typedef typename MT::CompositeType MCT;
115  //**********************************************************************************************
116 
117  //**********************************************************************************************
119  enum { evaluateVector = IsComputation<VT>::value };
120  //**********************************************************************************************
121 
122  //**********************************************************************************************
124  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
126  //**********************************************************************************************
127 
128  //**********************************************************************************************
130 
134  template< typename T1, typename T2, typename T3 >
135  struct UseSinglePrecisionKernel {
136  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
137  IsFloat<typename T1::ElementType>::value &&
138  IsFloat<typename T2::ElementType>::value &&
139  IsFloat<typename T3::ElementType>::value };
140  };
142  //**********************************************************************************************
143 
144  //**********************************************************************************************
146 
150  template< typename T1, typename T2, typename T3 >
151  struct UseDoublePrecisionKernel {
152  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
153  IsDouble<typename T1::ElementType>::value &&
154  IsDouble<typename T2::ElementType>::value &&
155  IsDouble<typename T3::ElementType>::value };
156  };
158  //**********************************************************************************************
159 
160  //**********************************************************************************************
162 
166  template< typename T1, typename T2, typename T3 >
167  struct UseSinglePrecisionComplexKernel {
168  typedef complex<float> Type;
169  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170  IsSame<typename T1::ElementType,Type>::value &&
171  IsSame<typename T2::ElementType,Type>::value &&
172  IsSame<typename T3::ElementType,Type>::value };
173  };
175  //**********************************************************************************************
176 
177  //**********************************************************************************************
179 
183  template< typename T1, typename T2, typename T3 >
184  struct UseDoublePrecisionComplexKernel {
185  typedef complex<double> Type;
186  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187  IsSame<typename T1::ElementType,Type>::value &&
188  IsSame<typename T2::ElementType,Type>::value &&
189  IsSame<typename T3::ElementType,Type>::value };
190  };
192  //**********************************************************************************************
193 
194  //**********************************************************************************************
196 
199  template< typename T1, typename T2, typename T3 >
200  struct UseDefaultKernel {
201  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
202  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
203  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
204  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
215  template< typename T1, typename T2, typename T3 >
216  struct UseVectorizedDefaultKernel {
217  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
218  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
219  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
220  IntrinsicTrait<typename T1::ElementType>::addition &&
221  IntrinsicTrait<typename T1::ElementType>::multiplication };
222  };
224  //**********************************************************************************************
225 
226  public:
227  //**Type definitions****************************************************************************
233  typedef const ElementType ReturnType;
234  typedef const ResultType CompositeType;
235 
237  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
238 
240  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
241 
244 
247  //**********************************************************************************************
248 
249  //**Compilation flags***************************************************************************
251  enum { vectorizable = 0 };
252 
254  enum { smpAssignable = 0 };
255  //**********************************************************************************************
256 
257  //**Constructor*********************************************************************************
263  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
264  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
265  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
266  , end_( ( (mat.rows()-1UL) & size_t(-2) ) + 1UL ) // End of the unrolled calculation loop
267  {
268  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
269  }
270  //**********************************************************************************************
271 
272  //**Subscript operator**************************************************************************
278  inline ReturnType operator[]( size_t index ) const {
279  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
280 
281  ElementType res;
282 
283  if( mat_.rows() != 0UL ) {
284  res = vec_[0UL] * mat_(0UL,index);
285  for( size_t j=1UL; j<end_; j+=2UL ) {
286  res += vec_[j] * mat_(j,index) + vec_[j+1UL] * mat_(j+1UL,index);
287  }
288  if( end_ < mat_.rows() ) {
289  res += vec_[end_] * mat_(end_,index);
290  }
291  }
292  else {
293  reset( res );
294  }
295 
296  return res;
297  }
298  //**********************************************************************************************
299 
300  //**Size function*******************************************************************************
305  inline size_t size() const {
306  return mat_.columns();
307  }
308  //**********************************************************************************************
309 
310  //**Left operand access*************************************************************************
315  inline LeftOperand leftOperand() const {
316  return vec_;
317  }
318  //**********************************************************************************************
319 
320  //**Right operand access************************************************************************
325  inline RightOperand rightOperand() const {
326  return mat_;
327  }
328  //**********************************************************************************************
329 
330  //**********************************************************************************************
336  template< typename T >
337  inline bool canAlias( const T* alias ) const {
338  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
339  }
340  //**********************************************************************************************
341 
342  //**********************************************************************************************
348  template< typename T >
349  inline bool isAliased( const T* alias ) const {
350  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
351  }
352  //**********************************************************************************************
353 
354  private:
355  //**Member variables****************************************************************************
358  const size_t end_;
359  //**********************************************************************************************
360 
361  //**Assignment to dense vectors*****************************************************************
374  template< typename VT1 > // Type of the target dense vector
375  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
376  {
378 
379  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
380 
381  if( rhs.mat_.rows() == 0UL ) {
382  reset( ~lhs );
383  return;
384  }
385  else if( rhs.mat_.columns() == 0UL ) {
386  return;
387  }
388 
389  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
390  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
391 
392  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
393  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
394  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
395  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
396 
397  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
398  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
399  TDVecDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
400  else
401  TDVecDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
402  }
404  //**********************************************************************************************
405 
406  //**Default assignment to dense vectors*********************************************************
420  template< typename VT1 // Type of the left-hand side target vector
421  , typename VT2 // Type of the left-hand side vector operand
422  , typename MT1 > // Type of the right-hand side matrix operand
423  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
424  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
425  {
426  const size_t M( A.rows() );
427  const size_t N( A.columns() );
428 
429  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
430  const size_t jend( N & size_t(-2) );
431 
432  for( size_t j=0UL; j<N; ++j ) {
433  y[j] = x[0UL] * A(0UL,j);
434  }
435  for( size_t i=1UL; i<M; ++i ) {
436  for( size_t j=0UL; j<jend; j+=2UL ) {
437  y[j ] += x[i] * A(i,j );
438  y[j+1UL] += x[i] * A(i,j+1UL);
439  }
440  if( jend < N ) {
441  y[jend] += x[i] * A(i,jend);
442  }
443  }
444  }
446  //**********************************************************************************************
447 
448  //**Vectorized default assignment to dense vectors**********************************************
462  template< typename VT1 // Type of the left-hand side target vector
463  , typename VT2 // Type of the left-hand side vector operand
464  , typename MT1 > // Type of the right-hand side matrix operand
465  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
466  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
467  {
468  typedef IntrinsicTrait<ElementType> IT;
469 
470  const size_t M( A.rows() );
471  const size_t N( A.columns() );
472 
473  size_t j( 0UL );
474 
475  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
476  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
477  for( size_t i=0UL; i<M; ++i ) {
478  const IntrinsicType x1( set( x[i] ) );
479  xmm1 = xmm1 + x1 * A.load(i,j );
480  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
481  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
482  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
483  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
484  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
485  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
486  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
487  }
488  y.store( j , xmm1 );
489  y.store( j+IT::size , xmm2 );
490  y.store( j+IT::size*2UL, xmm3 );
491  y.store( j+IT::size*3UL, xmm4 );
492  y.store( j+IT::size*4UL, xmm5 );
493  y.store( j+IT::size*5UL, xmm6 );
494  y.store( j+IT::size*6UL, xmm7 );
495  y.store( j+IT::size*7UL, xmm8 );
496  }
497  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
498  IntrinsicType xmm1, xmm2, xmm3, xmm4;
499  for( size_t i=0UL; i<M; ++i ) {
500  const IntrinsicType x1( set( x[i] ) );
501  xmm1 = xmm1 + x1 * A.load(i,j );
502  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
503  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
504  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
505  }
506  y.store( j , xmm1 );
507  y.store( j+IT::size , xmm2 );
508  y.store( j+IT::size*2UL, xmm3 );
509  y.store( j+IT::size*3UL, xmm4 );
510  }
511  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
512  IntrinsicType xmm1, xmm2, xmm3;
513  for( size_t i=0UL; i<M; ++i ) {
514  const IntrinsicType x1( set( x[i] ) );
515  xmm1 = xmm1 + x1 * A.load(i,j );
516  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
517  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
518  }
519  y.store( j , xmm1 );
520  y.store( j+IT::size , xmm2 );
521  y.store( j+IT::size*2UL, xmm3 );
522  }
523  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
524  IntrinsicType xmm1, xmm2;
525  for( size_t i=0UL; i<M; ++i ) {
526  const IntrinsicType x1( set( x[i] ) );
527  xmm1 = xmm1 + x1 * A.load(i,j );
528  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
529  }
530  y.store( j , xmm1 );
531  y.store( j+IT::size, xmm2 );
532  }
533  if( j < N ) {
534  IntrinsicType xmm1;
535  for( size_t i=0UL; i<M; ++i ) {
536  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
537  }
538  y.store( j, xmm1 );
539  }
540  }
542  //**********************************************************************************************
543 
544  //**BLAS-based assignment to dense vectors (default)********************************************
558  template< typename VT1 // Type of the left-hand side target vector
559  , typename VT2 // Type of the left-hand side vector operand
560  , typename MT1 > // Type of the right-hand side matrix operand
561  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
562  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
563  {
564  selectDefaultAssignKernel( y, x, A );
565  }
567  //**********************************************************************************************
568 
569  //**BLAS-based assignment to dense vectors (single precision)***********************************
570 #if BLAZE_BLAS_MODE
571 
584  template< typename VT1 // Type of the left-hand side target vector
585  , typename VT2 // Type of the left-hand side vector operand
586  , typename MT1 > // Type of the right-hand side matrix operand
587  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
588  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
589  {
590  using boost::numeric_cast;
591 
595 
596  const int M ( numeric_cast<int>( A.rows() ) );
597  const int N ( numeric_cast<int>( A.columns() ) );
598  const int lda( numeric_cast<int>( A.spacing() ) );
599 
600  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
601  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
602  }
604 #endif
605  //**********************************************************************************************
606 
607  //**BLAS-based assignment to dense vectors (double precision)***********************************
608 #if BLAZE_BLAS_MODE
609 
622  template< typename VT1 // Type of the left-hand side target vector
623  , typename VT2 // Type of the left-hand side vector operand
624  , typename MT1 > // Type of the right-hand side matrix operand
625  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
626  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
627  {
628  using boost::numeric_cast;
629 
633 
634  const int M ( numeric_cast<int>( A.rows() ) );
635  const int N ( numeric_cast<int>( A.columns() ) );
636  const int lda( numeric_cast<int>( A.spacing() ) );
637 
638  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
639  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
640  }
642 #endif
643  //**********************************************************************************************
644 
645  //**BLAS-based assignment to dense vectors (single precision complex)***************************
646 #if BLAZE_BLAS_MODE
647 
660  template< typename VT1 // Type of the left-hand side target vector
661  , typename VT2 // Type of the left-hand side vector operand
662  , typename MT1 > // Type of the right-hand side matrix operand
663  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
664  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
665  {
666  using boost::numeric_cast;
667 
671  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
672  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
673  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
674 
675  const int M ( numeric_cast<int>( A.rows() ) );
676  const int N ( numeric_cast<int>( A.columns() ) );
677  const int lda( numeric_cast<int>( A.spacing() ) );
678  const complex<float> alpha( 1.0F, 0.0F );
679  const complex<float> beta ( 0.0F, 0.0F );
680 
681  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
682  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
683  }
685 #endif
686  //**********************************************************************************************
687 
688  //**BLAS-based assignment to dense vectors (double precision complex)***************************
689 #if BLAZE_BLAS_MODE
690 
703  template< typename VT1 // Type of the left-hand side target vector
704  , typename VT2 // Type of the left-hand side vector operand
705  , typename MT1 > // Type of the right-hand side matrix operand
706  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
707  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
708  {
709  using boost::numeric_cast;
710 
714  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
715  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
716  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
717 
718  const int M ( numeric_cast<int>( A.rows() ) );
719  const int N ( numeric_cast<int>( A.columns() ) );
720  const int lda( numeric_cast<int>( A.spacing() ) );
721  const complex<double> alpha( 1.0, 0.0 );
722  const complex<double> beta ( 0.0, 0.0 );
723 
724  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
725  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
726  }
728 #endif
729  //**********************************************************************************************
730 
731  //**Assignment to sparse vectors****************************************************************
744  template< typename VT1 > // Type of the target sparse vector
745  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
746  {
748 
752 
753  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
754 
755  const ResultType tmp( rhs );
756  assign( ~lhs, tmp );
757  }
759  //**********************************************************************************************
760 
761  //**Addition assignment to dense vectors********************************************************
774  template< typename VT1 > // Type of the target dense vector
775  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
776  {
778 
779  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
780 
781  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
782  return;
783  }
784 
785  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
786  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
787 
788  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
789  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
790  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
791  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
792 
793  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
794  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
795  TDVecDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
796  else
797  TDVecDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
798  }
800  //**********************************************************************************************
801 
802  //**Default addition assignment to dense vectors************************************************
816  template< typename VT1 // Type of the left-hand side target vector
817  , typename VT2 // Type of the left-hand side vector operand
818  , typename MT1 > // Type of the right-hand side matrix operand
819  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
820  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
821  {
822  const size_t M( A.rows() );
823  const size_t N( A.columns() );
824 
825  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
826  const size_t jend( N & size_t(-2) );
827 
828  for( size_t i=0UL; i<M; ++i ) {
829  for( size_t j=0UL; j<jend; j+=2UL ) {
830  y[j ] += x[i] * A(i,j );
831  y[j+1UL] += x[i] * A(i,j+1UL);
832  }
833  if( jend < N ) {
834  y[jend] += x[i] * A(i,jend);
835  }
836  }
837  }
839  //**********************************************************************************************
840 
841  //**Vectorized default addition assignment to dense vectors*************************************
855  template< typename VT1 // Type of the left-hand side target vector
856  , typename VT2 // Type of the left-hand side vector operand
857  , typename MT1 > // Type of the right-hand side matrix operand
858  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
859  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
860  {
861  typedef IntrinsicTrait<ElementType> IT;
862 
863  const size_t M( A.rows() );
864  const size_t N( A.columns() );
865 
866  size_t j( 0UL );
867 
868  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
869  IntrinsicType xmm1( y.load(j ) );
870  IntrinsicType xmm2( y.load(j+IT::size ) );
871  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
872  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
873  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
874  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
875  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
876  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
877  for( size_t i=0UL; i<M; ++i ) {
878  const IntrinsicType x1( set( x[i] ) );
879  xmm1 = xmm1 + x1 * A.load(i,j );
880  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
881  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
882  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
883  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
884  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
885  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
886  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
887  }
888  y.store( j , xmm1 );
889  y.store( j+IT::size , xmm2 );
890  y.store( j+IT::size*2UL, xmm3 );
891  y.store( j+IT::size*3UL, xmm4 );
892  y.store( j+IT::size*4UL, xmm5 );
893  y.store( j+IT::size*5UL, xmm6 );
894  y.store( j+IT::size*6UL, xmm7 );
895  y.store( j+IT::size*7UL, xmm8 );
896  }
897  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
898  IntrinsicType xmm1( y.load(j ) );
899  IntrinsicType xmm2( y.load(j+IT::size ) );
900  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
901  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
902  for( size_t i=0UL; i<M; ++i ) {
903  const IntrinsicType x1( set( x[i] ) );
904  xmm1 = xmm1 + x1 * A.load(i,j );
905  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
906  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
907  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
908  }
909  y.store( j , xmm1 );
910  y.store( j+IT::size , xmm2 );
911  y.store( j+IT::size*2UL, xmm3 );
912  y.store( j+IT::size*3UL, xmm4 );
913  }
914  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
915  IntrinsicType xmm1( y.load(j ) );
916  IntrinsicType xmm2( y.load(j+IT::size ) );
917  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
918  for( size_t i=0UL; i<M; ++i ) {
919  const IntrinsicType x1( set( x[i] ) );
920  xmm1 = xmm1 + x1 * A.load(i,j );
921  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
922  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
923  }
924  y.store( j , xmm1 );
925  y.store( j+IT::size , xmm2 );
926  y.store( j+IT::size*2UL, xmm3 );
927  }
928  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
929  IntrinsicType xmm1( y.load(j ) );
930  IntrinsicType xmm2( y.load(j+IT::size) );
931  for( size_t i=0UL; i<M; ++i ) {
932  const IntrinsicType x1( set( x[i] ) );
933  xmm1 = xmm1 + x1 * A.load(i,j );
934  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
935  }
936  y.store( j , xmm1 );
937  y.store( j+IT::size, xmm2 );
938  }
939  if( j < N ) {
940  IntrinsicType xmm1( y.load(j) );
941  for( size_t i=0UL; i<M; ++i ) {
942  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
943  }
944  y.store( j, xmm1 );
945  }
946  }
948  //**********************************************************************************************
949 
950  //**BLAS-based addition assignment to dense vectors (default)***********************************
964  template< typename VT1 // Type of the left-hand side target vector
965  , typename VT2 // Type of the left-hand side vector operand
966  , typename MT1 > // Type of the right-hand side matrix operand
967  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
968  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
969  {
970  selectDefaultAddAssignKernel( y, x, A );
971  }
973  //**********************************************************************************************
974 
975  //**BLAS-based addition assignment to dense vectors (single precision)**************************
976 #if BLAZE_BLAS_MODE
977 
990  template< typename VT1 // Type of the left-hand side target vector
991  , typename VT2 // Type of the left-hand side vector operand
992  , typename MT1 > // Type of the right-hand side matrix operand
993  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
994  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
995  {
996  using boost::numeric_cast;
997 
1001 
1002  const int M ( numeric_cast<int>( A.rows() ) );
1003  const int N ( numeric_cast<int>( A.columns() ) );
1004  const int lda( numeric_cast<int>( A.spacing() ) );
1005 
1006  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
1007  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1008  }
1010 #endif
1011  //**********************************************************************************************
1012 
1013  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1014 #if BLAZE_BLAS_MODE
1015 
1028  template< typename VT1 // Type of the left-hand side target vector
1029  , typename VT2 // Type of the left-hand side vector operand
1030  , typename MT1 > // Type of the right-hand side matrix operand
1031  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1032  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1033  {
1034  using boost::numeric_cast;
1035 
1039 
1040  const int M ( numeric_cast<int>( A.rows() ) );
1041  const int N ( numeric_cast<int>( A.columns() ) );
1042  const int lda( numeric_cast<int>( A.spacing() ) );
1043 
1044  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
1045  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1046  }
1048 #endif
1049  //**********************************************************************************************
1050 
1051  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1052 #if BLAZE_BLAS_MODE
1053 
1066  template< typename VT1 // Type of the left-hand side target vector
1067  , typename VT2 // Type of the left-hand side vector operand
1068  , typename MT1 > // Type of the right-hand side matrix operand
1069  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1070  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1071  {
1072  using boost::numeric_cast;
1073 
1077  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1078  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1079  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1080 
1081  const int M ( numeric_cast<int>( A.rows() ) );
1082  const int N ( numeric_cast<int>( A.columns() ) );
1083  const int lda( numeric_cast<int>( A.spacing() ) );
1084  const complex<float> alpha( 1.0F, 0.0F );
1085  const complex<float> beta ( 1.0F, 0.0F );
1086 
1087  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1088  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1089  }
1091 #endif
1092  //**********************************************************************************************
1093 
1094  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1095 #if BLAZE_BLAS_MODE
1096 
1109  template< typename VT1 // Type of the left-hand side target vector
1110  , typename VT2 // Type of the left-hand side vector operand
1111  , typename MT1 > // Type of the right-hand side matrix operand
1112  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1113  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1114  {
1115  using boost::numeric_cast;
1116 
1120  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1121  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1122  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1123 
1124  const int M ( numeric_cast<int>( A.rows() ) );
1125  const int N ( numeric_cast<int>( A.columns() ) );
1126  const int lda( numeric_cast<int>( A.spacing() ) );
1127  const complex<double> alpha( 1.0, 0.0 );
1128  const complex<double> beta ( 1.0, 0.0 );
1129 
1130  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1131  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1132  }
1134 #endif
1135  //**********************************************************************************************
1136 
1137  //**Addition assignment to sparse vectors*******************************************************
1138  // No special implementation for the addition assignment to sparse vectors.
1139  //**********************************************************************************************
1140 
1141  //**Subtraction assignment to dense vectors*****************************************************
1154  template< typename VT1 > // Type of the target dense vector
1155  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1156  {
1158 
1159  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1160 
1161  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1162  return;
1163  }
1164 
1165  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
1166  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
1167 
1168  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1169  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1170  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1171  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1172 
1173  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1174  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1175  TDVecDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1176  else
1177  TDVecDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1178  }
1180  //**********************************************************************************************
1181 
1182  //**Default subtraction assignment to dense vectors*********************************************
1196  template< typename VT1 // Type of the left-hand side target vector
1197  , typename VT2 // Type of the left-hand side vector operand
1198  , typename MT1 > // Type of the right-hand side matrix operand
1199  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1200  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1201  {
1202  const size_t M( A.rows() );
1203  const size_t N( A.columns() );
1204 
1205  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1206  const size_t jend( N & size_t(-2) );
1207 
1208  for( size_t i=0UL; i<M; ++i ) {
1209  for( size_t j=0UL; j<jend; j+=2UL ) {
1210  y[j ] -= x[i] * A(i,j );
1211  y[j+1UL] -= x[i] * A(i,j+1UL);
1212  }
1213  if( jend < N ) {
1214  y[jend] -= x[i] * A(i,jend);
1215  }
1216  }
1217  }
1219  //**********************************************************************************************
1220 
1221  //**Vectorized default subtraction assignment to dense vectors**********************************
1235  template< typename VT1 // Type of the left-hand side target vector
1236  , typename VT2 // Type of the left-hand side vector operand
1237  , typename MT1 > // Type of the right-hand side matrix operand
1238  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1239  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1240  {
1241  typedef IntrinsicTrait<ElementType> IT;
1242 
1243  const size_t M( A.rows() );
1244  const size_t N( A.columns() );
1245 
1246  size_t j( 0UL );
1247 
1248  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1249  IntrinsicType xmm1( y.load(j ) );
1250  IntrinsicType xmm2( y.load(j+IT::size ) );
1251  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1252  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1253  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1254  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1255  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1256  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1257  for( size_t i=0UL; i<M; ++i ) {
1258  const IntrinsicType x1( set( x[i] ) );
1259  xmm1 = xmm1 - x1 * A.load(i,j );
1260  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1261  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1262  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1263  xmm5 = xmm5 - x1 * A.load(i,j+IT::size*4UL);
1264  xmm6 = xmm6 - x1 * A.load(i,j+IT::size*5UL);
1265  xmm7 = xmm7 - x1 * A.load(i,j+IT::size*6UL);
1266  xmm8 = xmm8 - x1 * A.load(i,j+IT::size*7UL);
1267  }
1268  y.store( j , xmm1 );
1269  y.store( j+IT::size , xmm2 );
1270  y.store( j+IT::size*2UL, xmm3 );
1271  y.store( j+IT::size*3UL, xmm4 );
1272  y.store( j+IT::size*4UL, xmm5 );
1273  y.store( j+IT::size*5UL, xmm6 );
1274  y.store( j+IT::size*6UL, xmm7 );
1275  y.store( j+IT::size*7UL, xmm8 );
1276  }
1277  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1278  IntrinsicType xmm1( y.load(j ) );
1279  IntrinsicType xmm2( y.load(j+IT::size ) );
1280  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1281  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1282  for( size_t i=0UL; i<M; ++i ) {
1283  const IntrinsicType x1( set( x[i] ) );
1284  xmm1 = xmm1 - x1 * A.load(i,j );
1285  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1286  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1287  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1288  }
1289  y.store( j , xmm1 );
1290  y.store( j+IT::size , xmm2 );
1291  y.store( j+IT::size*2UL, xmm3 );
1292  y.store( j+IT::size*3UL, xmm4 );
1293  }
1294  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1295  IntrinsicType xmm1( y.load(j ) );
1296  IntrinsicType xmm2( y.load(j+IT::size ) );
1297  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1298  for( size_t i=0UL; i<M; ++i ) {
1299  const IntrinsicType x1( set( x[i] ) );
1300  xmm1 = xmm1 - x1 * A.load(i,j );
1301  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1302  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1303  }
1304  y.store( j , xmm1 );
1305  y.store( j+IT::size , xmm2 );
1306  y.store( j+IT::size*2UL, xmm3 );
1307  }
1308  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1309  IntrinsicType xmm1( y.load(j ) );
1310  IntrinsicType xmm2( y.load(j+IT::size) );
1311  for( size_t i=0UL; i<M; ++i ) {
1312  const IntrinsicType x1( set( x[i] ) );
1313  xmm1 = xmm1 - x1 * A.load(i,j );
1314  xmm2 = xmm2 - x1 * A.load(i,j+IT::size);
1315  }
1316  y.store( j , xmm1 );
1317  y.store( j+IT::size, xmm2 );
1318  }
1319  if( j < N ) {
1320  IntrinsicType xmm1( y.load(j) );
1321  for( size_t i=0UL; i<M; ++i ) {
1322  xmm1 = xmm1 - set( x[i] ) * A.load(i,j);
1323  }
1324  y.store( j, xmm1 );
1325  }
1326  }
1328  //**********************************************************************************************
1329 
1330  //**BLAS-based subtraction assignment to dense vectors (default)********************************
1344  template< typename VT1 // Type of the left-hand side target vector
1345  , typename VT2 // Type of the left-hand side vector operand
1346  , typename MT1 > // Type of the right-hand side matrix operand
1347  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1348  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1349  {
1350  selectDefaultSubAssignKernel( y, x, A );
1351  }
1353  //**********************************************************************************************
1354 
1355  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
1356 #if BLAZE_BLAS_MODE
1357 
1370  template< typename VT1 // Type of the left-hand side target vector
1371  , typename VT2 // Type of the left-hand side vector operand
1372  , typename MT1 > // Type of the right-hand side matrix operand
1373  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1374  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1375  {
1376  using boost::numeric_cast;
1377 
1381 
1382  const int M ( numeric_cast<int>( A.rows() ) );
1383  const int N ( numeric_cast<int>( A.columns() ) );
1384  const int lda( numeric_cast<int>( A.spacing() ) );
1385 
1386  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -1.0F,
1387  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1388  }
1390 #endif
1391  //**********************************************************************************************
1392 
1393  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
1394 #if BLAZE_BLAS_MODE
1395 
1408  template< typename VT1 // Type of the left-hand side target vector
1409  , typename VT2 // Type of the left-hand side vector operand
1410  , typename MT1 > // Type of the right-hand side matrix operand
1411  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1412  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1413  {
1414  using boost::numeric_cast;
1415 
1419 
1420  const int M ( numeric_cast<int>( A.rows() ) );
1421  const int N ( numeric_cast<int>( A.columns() ) );
1422  const int lda( numeric_cast<int>( A.spacing() ) );
1423 
1424  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -1.0,
1425  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1426  }
1428 #endif
1429  //**********************************************************************************************
1430 
1431  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
1432 #if BLAZE_BLAS_MODE
1433 
1446  template< typename VT1 // Type of the left-hand side target vector
1447  , typename VT2 // Type of the left-hand side vector operand
1448  , typename MT1 > // Type of the right-hand side matrix operand
1449  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1450  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1451  {
1452  using boost::numeric_cast;
1453 
1457  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
1458  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
1459  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
1460 
1461  const int M ( numeric_cast<int>( A.rows() ) );
1462  const int N ( numeric_cast<int>( A.columns() ) );
1463  const int lda( numeric_cast<int>( A.spacing() ) );
1464  const complex<float> alpha( -1.0F, 0.0F );
1465  const complex<float> beta ( 1.0F, 0.0F );
1466 
1467  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1468  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1469  }
1471 #endif
1472  //**********************************************************************************************
1473 
1474  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
1475 #if BLAZE_BLAS_MODE
1476 
1489  template< typename VT1 // Type of the left-hand side target vector
1490  , typename VT2 // Type of the left-hand side vector operand
1491  , typename MT1 > // Type of the right-hand side matrix operand
1492  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1493  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1494  {
1495  using boost::numeric_cast;
1496 
1500  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
1501  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
1502  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
1503 
1504  const int M ( numeric_cast<int>( A.rows() ) );
1505  const int N ( numeric_cast<int>( A.columns() ) );
1506  const int lda( numeric_cast<int>( A.spacing() ) );
1507  const complex<double> alpha( -1.0, 0.0 );
1508  const complex<double> beta ( 1.0, 0.0 );
1509 
1510  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1511  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1512  }
1514 #endif
1515  //**********************************************************************************************
1516 
1517  //**Subtraction assignment to sparse vectors****************************************************
1518  // No special implementation for the subtraction assignment to sparse vectors.
1519  //**********************************************************************************************
1520 
1521  //**Multiplication assignment to dense vectors**************************************************
1534  template< typename VT1 > // Type of the target dense vector
1535  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1536  {
1538 
1542 
1543  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1544 
1545  const ResultType tmp( rhs );
1546  multAssign( ~lhs, tmp );
1547  }
1549  //**********************************************************************************************
1550 
1551  //**Multiplication assignment to sparse vectors*************************************************
1552  // No special implementation for the multiplication assignment to sparse vectors.
1553  //**********************************************************************************************
1554 
1555  //**Compile time checks*************************************************************************
1562  //**********************************************************************************************
1563 };
1564 //*************************************************************************************************
1565 
1566 
1567 
1568 
1569 //=================================================================================================
1570 //
1571 // DVECSCALARMULTEXPR SPECIALIZATION
1572 //
1573 //=================================================================================================
1574 
1575 //*************************************************************************************************
1583 template< typename VT // Type of the left-hand side dense vector
1584  , typename MT // Type of the right-hand side dense matrix
1585  , typename ST > // Type of the side scalar value
1586 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
1587  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1588  , private VecScalarMultExpr
1589  , private Computation
1590 {
1591  private:
1592  //**Type definitions****************************************************************************
1593  typedef TDVecDMatMultExpr<VT,MT> VMM;
1594  typedef typename VMM::ResultType RES;
1595  typedef typename VT::ResultType VRT;
1596  typedef typename MT::ResultType MRT;
1597  typedef typename VRT::ElementType VET;
1598  typedef typename MRT::ElementType MET;
1599  typedef typename VT::CompositeType VCT;
1600  typedef typename MT::CompositeType MCT;
1601  //**********************************************************************************************
1602 
1603  //**********************************************************************************************
1605  enum { evaluateVector = IsComputation<VT>::value };
1606  //**********************************************************************************************
1607 
1608  //**********************************************************************************************
1610  enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
1611  IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1612  //**********************************************************************************************
1613 
1614  //**********************************************************************************************
1616 
1619  template< typename T1, typename T2, typename T3, typename T4 >
1620  struct UseSinglePrecisionKernel {
1621  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1622  IsFloat<typename T1::ElementType>::value &&
1623  IsFloat<typename T2::ElementType>::value &&
1624  IsFloat<typename T3::ElementType>::value &&
1625  !IsComplex<T4>::value };
1626  };
1627  //**********************************************************************************************
1628 
1629  //**********************************************************************************************
1631 
1634  template< typename T1, typename T2, typename T3, typename T4 >
1635  struct UseDoublePrecisionKernel {
1636  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1637  IsDouble<typename T1::ElementType>::value &&
1638  IsDouble<typename T2::ElementType>::value &&
1639  IsDouble<typename T3::ElementType>::value &&
1640  !IsComplex<T4>::value };
1641  };
1642  //**********************************************************************************************
1643 
1644  //**********************************************************************************************
1646 
1649  template< typename T1, typename T2, typename T3 >
1650  struct UseSinglePrecisionComplexKernel {
1651  typedef complex<float> Type;
1652  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1653  IsSame<typename T1::ElementType,Type>::value &&
1654  IsSame<typename T2::ElementType,Type>::value &&
1655  IsSame<typename T3::ElementType,Type>::value };
1656  };
1657  //**********************************************************************************************
1658 
1659  //**********************************************************************************************
1661 
1664  template< typename T1, typename T2, typename T3 >
1665  struct UseDoublePrecisionComplexKernel {
1666  typedef complex<double> Type;
1667  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1668  IsSame<typename T1::ElementType,Type>::value &&
1669  IsSame<typename T2::ElementType,Type>::value &&
1670  IsSame<typename T3::ElementType,Type>::value };
1671  };
1672  //**********************************************************************************************
1673 
1674  //**********************************************************************************************
1676 
1678  template< typename T1, typename T2, typename T3, typename T4 >
1679  struct UseDefaultKernel {
1680  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1681  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1682  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1683  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1684  };
1685  //**********************************************************************************************
1686 
1687  //**********************************************************************************************
1689 
1692  template< typename T1, typename T2, typename T3, typename T4 >
1693  struct UseVectorizedDefaultKernel {
1694  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1695  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1696  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1697  IsSame<typename T1::ElementType,T4>::value &&
1698  IntrinsicTrait<typename T1::ElementType>::addition &&
1699  IntrinsicTrait<typename T1::ElementType>::multiplication };
1700  };
1701  //**********************************************************************************************
1702 
1703  public:
1704  //**Type definitions****************************************************************************
1705  typedef DVecScalarMultExpr<VMM,ST,true> This;
1706  typedef typename MultTrait<RES,ST>::Type ResultType;
1707  typedef typename ResultType::TransposeType TransposeType;
1708  typedef typename ResultType::ElementType ElementType;
1709  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
1710  typedef const ElementType ReturnType;
1711  typedef const ResultType CompositeType;
1712 
1714  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
1715 
1717  typedef ST RightOperand;
1718 
1720  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
1721 
1723  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
1724  //**********************************************************************************************
1725 
1726  //**Compilation flags***************************************************************************
1728  enum { vectorizable = 0 };
1729 
1731  enum { smpAssignable = 0 };
1732  //**********************************************************************************************
1733 
1734  //**Constructor*********************************************************************************
1740  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
1741  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
1742  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
1743  {}
1744  //**********************************************************************************************
1745 
1746  //**Subscript operator**************************************************************************
1752  inline ReturnType operator[]( size_t index ) const {
1753  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
1754  return vector_[index] * scalar_;
1755  }
1756  //**********************************************************************************************
1757 
1758  //**Size function*******************************************************************************
1763  inline size_t size() const {
1764  return vector_.size();
1765  }
1766  //**********************************************************************************************
1767 
1768  //**Left operand access*************************************************************************
1773  inline LeftOperand leftOperand() const {
1774  return vector_;
1775  }
1776  //**********************************************************************************************
1777 
1778  //**Right operand access************************************************************************
1783  inline RightOperand rightOperand() const {
1784  return scalar_;
1785  }
1786  //**********************************************************************************************
1787 
1788  //**********************************************************************************************
1794  template< typename T >
1795  inline bool canAlias( const T* alias ) const {
1796  return vector_.canAlias( alias );
1797  }
1798  //**********************************************************************************************
1799 
1800  //**********************************************************************************************
1806  template< typename T >
1807  inline bool isAliased( const T* alias ) const {
1808  return vector_.isAliased( alias );
1809  }
1810  //**********************************************************************************************
1811 
1812  private:
1813  //**Member variables****************************************************************************
1814  LeftOperand vector_;
1815  RightOperand scalar_;
1816  //**********************************************************************************************
1817 
1818  //**Assignment to dense vectors*****************************************************************
1830  template< typename VT1 > // Type of the target dense vector
1831  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
1832  {
1834 
1835  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1836 
1837  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1838  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1839 
1840  if( right.rows() == 0UL ) {
1841  reset( ~lhs );
1842  return;
1843  }
1844  else if( right.columns() == 0UL ) {
1845  return;
1846  }
1847 
1848  LT x( left ); // Evaluation of the left-hand side dense vector operand
1849  RT A( right ); // Evaluation of the right-hand side dense matrix operand
1850 
1851  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
1852  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
1853  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
1854  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1855 
1856  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1857  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1858  DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1859  else
1860  DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1861  }
1862  //**********************************************************************************************
1863 
1864  //**Default assignment to dense vectors*********************************************************
1878  template< typename VT1 // Type of the left-hand side target vector
1879  , typename VT2 // Type of the left-hand side vector operand
1880  , typename MT1 // Type of the right-hand side matrix operand
1881  , typename ST2 > // Type of the scalar value
1882  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1883  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1884  {
1885  const size_t M( A.rows() );
1886  const size_t N( A.columns() );
1887 
1888  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1889  const size_t jend( N & size_t(-2) );
1890 
1891  for( size_t j=0UL; j<N; ++j ) {
1892  y[j] = x[0UL] * A(0UL,j);
1893  }
1894  for( size_t i=1UL; i<M; ++i ) {
1895  for( size_t j=0UL; j<jend; j+=2UL ) {
1896  y[j ] += x[i] * A(i,j );
1897  y[j+1UL] += x[i] * A(i,j+1UL);
1898  }
1899  if( jend < N ) {
1900  y[jend] += x[i] * A(i,jend);
1901  }
1902  }
1903  for( size_t j=0UL; j<N; ++j ) {
1904  y[j] *= scalar;
1905  }
1906  }
1907  //**********************************************************************************************
1908 
1909  //**Default assignment to dense vectors*********************************************************
1923  template< typename VT1 // Type of the left-hand side target vector
1924  , typename VT2 // Type of the left-hand side vector operand
1925  , typename MT1 // Type of the right-hand side matrix operand
1926  , typename ST2 > // Type of the scalar value
1927  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1928  selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
1929  {
1930  typedef IntrinsicTrait<ElementType> IT;
1931 
1932  const size_t M( A.rows() );
1933  const size_t N( A.columns() );
1934 
1935  const IntrinsicType factor( set( scalar ) );
1936 
1937  size_t j( 0UL );
1938 
1939  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1940  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1941  for( size_t i=0UL; i<M; ++i ) {
1942  const IntrinsicType x1( set( x[i] ) );
1943  xmm1 = xmm1 + x1 * A.load(i,j );
1944  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1945  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1946  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1947  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
1948  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
1949  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
1950  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
1951  }
1952  y.store( j , xmm1*factor );
1953  y.store( j+IT::size , xmm2*factor );
1954  y.store( j+IT::size*2UL, xmm3*factor );
1955  y.store( j+IT::size*3UL, xmm4*factor );
1956  y.store( j+IT::size*4UL, xmm5*factor );
1957  y.store( j+IT::size*5UL, xmm6*factor );
1958  y.store( j+IT::size*6UL, xmm7*factor );
1959  y.store( j+IT::size*7UL, xmm8*factor );
1960  }
1961  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1962  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1963  for( size_t i=0UL; i<M; ++i ) {
1964  const IntrinsicType x1( set( x[i] ) );
1965  xmm1 = xmm1 + x1 * A.load(i,j );
1966  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1967  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1968  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1969  }
1970  y.store( j , xmm1*factor );
1971  y.store( j+IT::size , xmm2*factor );
1972  y.store( j+IT::size*2UL, xmm3*factor );
1973  y.store( j+IT::size*3UL, xmm4*factor );
1974  }
1975  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1976  IntrinsicType xmm1, xmm2, xmm3;
1977  for( size_t i=0UL; i<M; ++i ) {
1978  const IntrinsicType x1( set( x[i] ) );
1979  xmm1 = xmm1 + x1 * A.load(i,j );
1980  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1981  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1982  }
1983  y.store( j , xmm1*factor );
1984  y.store( j+IT::size , xmm2*factor );
1985  y.store( j+IT::size*2UL, xmm3*factor );
1986  }
1987  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1988  IntrinsicType xmm1, xmm2;
1989  for( size_t i=0UL; i<M; ++i ) {
1990  const IntrinsicType x1( set( x[i] ) );
1991  xmm1 = xmm1 + x1 * A.load(i,j );
1992  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1993  }
1994  y.store( j , xmm1*factor );
1995  y.store( j+IT::size, xmm2*factor );
1996  }
1997  if( j < N ) {
1998  IntrinsicType xmm1;
1999  for( size_t i=0UL; i<M; ++i ) {
2000  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2001  }
2002  y.store( j, xmm1*factor );
2003  }
2004  }
2005  //**********************************************************************************************
2006 
2007  //**BLAS-based assignment to dense vectors (default)********************************************
2020  template< typename VT1 // Type of the left-hand side target vector
2021  , typename VT2 // Type of the left-hand side vector operand
2022  , typename MT1 // Type of the right-hand side matrix operand
2023  , typename ST2 > // Type of the scalar value
2024  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2025  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2026  {
2027  selectDefaultAssignKernel( y, x, A, scalar );
2028  }
2029  //**********************************************************************************************
2030 
2031  //**BLAS-based assignment to dense vectors (single precision)***********************************
2032 #if BLAZE_BLAS_MODE
2033 
2046  template< typename VT1 // Type of the left-hand side target vector
2047  , typename VT2 // Type of the left-hand side vector operand
2048  , typename MT1 // Type of the right-hand side matrix operand
2049  , typename ST2 > // Type of the scalar value
2050  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2051  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2052  {
2053  using boost::numeric_cast;
2054 
2058 
2059  const int M ( numeric_cast<int>( A.rows() ) );
2060  const int N ( numeric_cast<int>( A.columns() ) );
2061  const int lda( numeric_cast<int>( A.spacing() ) );
2062 
2063  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2064  A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2065  }
2066 #endif
2067  //**********************************************************************************************
2068 
2069  //**BLAS-based assignment to dense vectors (double precision)***********************************
2070 #if BLAZE_BLAS_MODE
2071 
2084  template< typename VT1 // Type of the left-hand side target vector
2085  , typename VT2 // Type of the left-hand side vector operand
2086  , typename MT1 // Type of the right-hand side matrix operand
2087  , typename ST2 > // Type of the scalar value
2088  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2089  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2090  {
2091  using boost::numeric_cast;
2092 
2096 
2097  const int M ( numeric_cast<int>( A.rows() ) );
2098  const int N ( numeric_cast<int>( A.columns() ) );
2099  const int lda( numeric_cast<int>( A.spacing() ) );
2100 
2101  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2102  A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2103  }
2104 #endif
2105  //**********************************************************************************************
2106 
2107  //**BLAS-based assignment to dense vectors (single precision complex)***************************
2108 #if BLAZE_BLAS_MODE
2109 
2122  template< typename VT1 // Type of the left-hand side target vector
2123  , typename VT2 // Type of the left-hand side vector operand
2124  , typename MT1 // Type of the right-hand side matrix operand
2125  , typename ST2 > // Type of the scalar value
2126  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2127  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2128  {
2129  using boost::numeric_cast;
2130 
2134  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2135  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2136  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2137 
2138  const int M ( numeric_cast<int>( A.rows() ) );
2139  const int N ( numeric_cast<int>( A.columns() ) );
2140  const int lda( numeric_cast<int>( A.spacing() ) );
2141  const complex<float> alpha( scalar );
2142  const complex<float> beta ( 0.0F, 0.0F );
2143 
2144  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2145  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2146  }
2147 #endif
2148  //**********************************************************************************************
2149 
2150  //**BLAS-based assignment to dense vectors (double precision complex)***************************
2151 #if BLAZE_BLAS_MODE
2152 
2165  template< typename VT1 // Type of the left-hand side target vector
2166  , typename VT2 // Type of the left-hand side vector operand
2167  , typename MT1 // Type of the right-hand side matrix operand
2168  , typename ST2 > // Type of the scalar value
2169  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2170  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2171  {
2172  using boost::numeric_cast;
2173 
2177  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2178  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2179  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2180 
2181  const int M ( numeric_cast<int>( A.rows() ) );
2182  const int N ( numeric_cast<int>( A.columns() ) );
2183  const int lda( numeric_cast<int>( A.spacing() ) );
2184  const complex<double> alpha( scalar );
2185  const complex<double> beta ( 0.0, 0.0 );
2186 
2187  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2188  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2189  }
2190 #endif
2191  //**********************************************************************************************
2192 
2193  //**Assignment to sparse vectors****************************************************************
2205  template< typename VT1 > // Type of the target sparse vector
2206  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2207  {
2209 
2213 
2214  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2215 
2216  const ResultType tmp( rhs );
2217  assign( ~lhs, tmp );
2218  }
2219  //**********************************************************************************************
2220 
2221  //**Addition assignment to dense vectors********************************************************
2233  template< typename VT1 > // Type of the target dense vector
2234  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2235  {
2237 
2238  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2239 
2240  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2241  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2242 
2243  if( right.rows() == 0UL || right.columns() == 0UL ) {
2244  return;
2245  }
2246 
2247  LT x( left ); // Evaluation of the left-hand side dense vector operand
2248  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2249 
2250  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2251  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2252  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2253  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2254 
2255  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2256  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2257  DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2258  else
2259  DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2260  }
2261  //**********************************************************************************************
2262 
2263  //**Default addition assignment to dense vectors************************************************
2277  template< typename VT1 // Type of the left-hand side target vector
2278  , typename VT2 // Type of the left-hand side vector operand
2279  , typename MT1 // Type of the right-hand side matrix operand
2280  , typename ST2 > // Type of the scalar value
2281  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2282  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2283  {
2284  y.addAssign( x * A * scalar );
2285  }
2286  //**********************************************************************************************
2287 
2288  //**Vectorized default addition assignment to dense vectors*************************************
2302  template< typename VT1 // Type of the left-hand side target vector
2303  , typename VT2 // Type of the left-hand side vector operand
2304  , typename MT1 // Type of the right-hand side matrix operand
2305  , typename ST2 > // Type of the scalar value
2306  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2307  selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2308  {
2309  typedef IntrinsicTrait<ElementType> IT;
2310 
2311  const size_t M( A.rows() );
2312  const size_t N( A.columns() );
2313 
2314  const IntrinsicType factor( set( scalar ) );
2315 
2316  size_t j( 0UL );
2317 
2318  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2319  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2320  for( size_t i=0UL; i<M; ++i ) {
2321  const IntrinsicType x1( set( x[i] ) );
2322  xmm1 = xmm1 + x1 * A.load(i,j );
2323  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2324  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2325  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2326  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2327  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2328  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2329  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2330  }
2331  y.store( j , y.load(j ) + xmm1*factor );
2332  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2333  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2334  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2335  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
2336  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
2337  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
2338  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
2339  }
2340  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2341  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2342  for( size_t i=0UL; i<M; ++i ) {
2343  const IntrinsicType x1( set( x[i] ) );
2344  xmm1 = xmm1 + x1 * A.load(i,j );
2345  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2346  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2347  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2348  }
2349  y.store( j , y.load(j ) + xmm1*factor );
2350  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2351  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2352  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2353  }
2354  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2355  IntrinsicType xmm1, xmm2, xmm3;
2356  for( size_t i=0UL; i<M; ++i ) {
2357  const IntrinsicType x1( set( x[i] ) );
2358  xmm1 = xmm1 + x1 * A.load(i,j );
2359  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2360  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2361  }
2362  y.store( j , y.load(j ) + xmm1*factor );
2363  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2364  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2365  }
2366  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2367  IntrinsicType xmm1, xmm2;
2368  for( size_t i=0UL; i<M; ++i ) {
2369  const IntrinsicType x1( set( x[i] ) );
2370  xmm1 = xmm1 + x1 * A.load(i,j );
2371  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2372  }
2373  y.store( j , y.load(j ) + xmm1*factor );
2374  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
2375  }
2376  if( j < N ) {
2377  IntrinsicType xmm1;
2378  for( size_t i=0UL; i<M; ++i ) {
2379  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2380  }
2381  y.store( j, y.load(j) + xmm1*factor );
2382  }
2383  }
2384  //**********************************************************************************************
2385 
2386  //**BLAS-based addition assignment to dense vectors (default)***********************************
2400  template< typename VT1 // Type of the left-hand side target vector
2401  , typename VT2 // Type of the left-hand side vector operand
2402  , typename MT1 // Type of the right-hand side matrix operand
2403  , typename ST2 > // Type of the scalar value
2404  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2405  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2406  {
2407  selectDefaultAddAssignKernel( y, x, A, scalar );
2408  }
2409  //**********************************************************************************************
2410 
2411  //**BLAS-based addition assignment to dense vectors (single precision)**************************
2412 #if BLAZE_BLAS_MODE
2413 
2426  template< typename VT1 // Type of the left-hand side target vector
2427  , typename VT2 // Type of the left-hand side vector operand
2428  , typename MT1 // Type of the right-hand side matrix operand
2429  , typename ST2 > // Type of the scalar value
2430  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2431  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2432  {
2433  using boost::numeric_cast;
2434 
2438 
2439  const int M ( numeric_cast<int>( A.rows() ) );
2440  const int N ( numeric_cast<int>( A.columns() ) );
2441  const int lda( numeric_cast<int>( A.spacing() ) );
2442 
2443  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2444  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2445  }
2446 #endif
2447  //**********************************************************************************************
2448 
2449  //**BLAS-based addition assignment to dense vectors (double precision)**************************
2450 #if BLAZE_BLAS_MODE
2451 
2464  template< typename VT1 // Type of the left-hand side target vector
2465  , typename VT2 // Type of the left-hand side vector operand
2466  , typename MT1 // Type of the right-hand side matrix operand
2467  , typename ST2 > // Type of the scalar value
2468  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2469  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2470  {
2471  using boost::numeric_cast;
2472 
2476 
2477  const int M ( numeric_cast<int>( A.rows() ) );
2478  const int N ( numeric_cast<int>( A.columns() ) );
2479  const int lda( numeric_cast<int>( A.spacing() ) );
2480 
2481  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2482  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2483  }
2484 #endif
2485  //**********************************************************************************************
2486 
2487  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
2488 #if BLAZE_BLAS_MODE
2489 
2502  template< typename VT1 // Type of the left-hand side target vector
2503  , typename VT2 // Type of the left-hand side vector operand
2504  , typename MT1 // Type of the right-hand side matrix operand
2505  , typename ST2 > // Type of the scalar value
2506  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2507  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2508  {
2509  using boost::numeric_cast;
2510 
2514  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2515  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2516  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2517 
2518  const int M ( numeric_cast<int>( A.rows() ) );
2519  const int N ( numeric_cast<int>( A.columns() ) );
2520  const int lda( numeric_cast<int>( A.spacing() ) );
2521  const complex<float> alpha( scalar );
2522  const complex<float> beta ( 1.0F, 0.0F );
2523 
2524  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2525  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2526  }
2527 #endif
2528  //**********************************************************************************************
2529 
2530  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
2531 #if BLAZE_BLAS_MODE
2532 
2545  template< typename VT1 // Type of the left-hand side target vector
2546  , typename VT2 // Type of the left-hand side vector operand
2547  , typename MT1 // Type of the right-hand side matrix operand
2548  , typename ST2 > // Type of the scalar value
2549  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2550  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2551  {
2552  using boost::numeric_cast;
2553 
2557  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2558  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2559  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2560 
2561  const int M ( numeric_cast<int>( A.rows() ) );
2562  const int N ( numeric_cast<int>( A.columns() ) );
2563  const int lda( numeric_cast<int>( A.spacing() ) );
2564  const complex<double> alpha( scalar );
2565  const complex<double> beta ( 1.0, 0.0 );
2566 
2567  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2568  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2569  }
2570 #endif
2571  //**********************************************************************************************
2572 
2573  //**Addition assignment to sparse vectors*******************************************************
2574  // No special implementation for the addition assignment to sparse vectors.
2575  //**********************************************************************************************
2576 
2577  //**Subtraction assignment to dense vectors*****************************************************
2589  template< typename VT1 > // Type of the target dense vector
2590  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2591  {
2593 
2594  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2595 
2596  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2597  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2598 
2599  if( right.rows() == 0UL || right.columns() == 0UL ) {
2600  return;
2601  }
2602 
2603  LT x( left ); // Evaluation of the left-hand side dense vector operand
2604  RT A( right ); // Evaluation of the right-hand side dense matrix operand
2605 
2606  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2607  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2608  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2609  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2610 
2611  if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2612  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2613  DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2614  else
2615  DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2616  }
2617  //**********************************************************************************************
2618 
2619  //**Default subtraction assignment to dense vectors*********************************************
2633  template< typename VT1 // Type of the left-hand side target vector
2634  , typename VT2 // Type of the left-hand side vector operand
2635  , typename MT1 // Type of the right-hand side matrix operand
2636  , typename ST2 > // Type of the scalar value
2637  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2638  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2639  {
2640  y.subAssign( x * A * scalar );
2641  }
2642  //**********************************************************************************************
2643 
2644  //**Vectorized default subtraction assignment to dense vectors**********************************
2658  template< typename VT1 // Type of the left-hand side target vector
2659  , typename VT2 // Type of the left-hand side vector operand
2660  , typename MT1 // Type of the right-hand side matrix operand
2661  , typename ST2 > // Type of the scalar value
2662  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2663  selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2664  {
2665  typedef IntrinsicTrait<ElementType> IT;
2666 
2667  const size_t M( A.rows() );
2668  const size_t N( A.columns() );
2669 
2670  const IntrinsicType factor( set( scalar ) );
2671 
2672  size_t j( 0UL );
2673 
2674  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2675  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2676  for( size_t i=0UL; i<M; ++i ) {
2677  const IntrinsicType x1( set( x[i] ) );
2678  xmm1 = xmm1 + x1 * A.load(i,j );
2679  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2680  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2681  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2682  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2683  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2684  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2685  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2686  }
2687  y.store( j , y.load(j ) - xmm1*factor );
2688  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
2689  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
2690  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
2691  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
2692  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
2693  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
2694  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
2695  }
2696  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2697  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2698  for( size_t i=0UL; i<M; ++i ) {
2699  const IntrinsicType x1( set( x[i] ) );
2700  xmm1 = xmm1 + x1 * A.load(i,j );
2701  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2702  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2703  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2704  }
2705  y.store( j , y.load(j ) - xmm1*factor );
2706  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
2707  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
2708  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
2709  }
2710  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2711  IntrinsicType xmm1, xmm2, xmm3;
2712  for( size_t i=0UL; i<M; ++i ) {
2713  const IntrinsicType x1( set( x[i] ) );
2714  xmm1 = xmm1 + x1 * A.load(i,j );
2715  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2716  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2717  }
2718  y.store( j , y.load(j ) - xmm1*factor );
2719  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
2720  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
2721  }
2722  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2723  IntrinsicType xmm1, xmm2;
2724  for( size_t i=0UL; i<M; ++i ) {
2725  const IntrinsicType x1( set( x[i] ) );
2726  xmm1 = xmm1 + x1 * A.load(i,j );
2727  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2728  }
2729  y.store( j , y.load(j ) - xmm1*factor );
2730  y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
2731  }
2732  if( j < N ) {
2733  IntrinsicType xmm1;
2734  for( size_t i=0UL; i<M; ++i ) {
2735  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2736  }
2737  y.store( j, y.load(j) - xmm1*factor );
2738  }
2739  }
2740  //**********************************************************************************************
2741 
2742  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2756  template< typename VT1 // Type of the left-hand side target vector
2757  , typename VT2 // Type of the left-hand side vector operand
2758  , typename MT1 // Type of the right-hand side matrix operand
2759  , typename ST2 > // Type of the scalar value
2760  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2761  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2762  {
2763  selectDefaultSubAssignKernel( y, x, A, scalar );
2764  }
2765  //**********************************************************************************************
2766 
2767  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2768 #if BLAZE_BLAS_MODE
2769 
2782  template< typename VT1 // Type of the left-hand side target vector
2783  , typename VT2 // Type of the left-hand side vector operand
2784  , typename MT1 // Type of the right-hand side matrix operand
2785  , typename ST2 > // Type of the scalar value
2786  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2787  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2788  {
2789  using boost::numeric_cast;
2790 
2794 
2795  const int M ( numeric_cast<int>( A.rows() ) );
2796  const int N ( numeric_cast<int>( A.columns() ) );
2797  const int lda( numeric_cast<int>( A.spacing() ) );
2798 
2799  cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2800  A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2801  }
2802 #endif
2803  //**********************************************************************************************
2804 
2805  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2806 #if BLAZE_BLAS_MODE
2807 
2820  template< typename VT1 // Type of the left-hand side target vector
2821  , typename VT2 // Type of the left-hand side vector operand
2822  , typename MT1 // Type of the right-hand side matrix operand
2823  , typename ST2 > // Type of the scalar value
2824  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2825  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2826  {
2827  using boost::numeric_cast;
2828 
2832 
2833  const int M ( numeric_cast<int>( A.rows() ) );
2834  const int N ( numeric_cast<int>( A.columns() ) );
2835  const int lda( numeric_cast<int>( A.spacing() ) );
2836 
2837  cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
2838  A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2839  }
2840 #endif
2841  //**********************************************************************************************
2842 
2843  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2844 #if BLAZE_BLAS_MODE
2845 
2858  template< typename VT1 // Type of the left-hand side target vector
2859  , typename VT2 // Type of the left-hand side vector operand
2860  , typename MT1 // Type of the right-hand side matrix operand
2861  , typename ST2 > // Type of the scalar value
2862  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2863  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2864  {
2865  using boost::numeric_cast;
2866 
2870  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT1::ElementType::value_type );
2871  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename VT2::ElementType::value_type );
2872  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT1::ElementType::value_type );
2873 
2874  const int M ( numeric_cast<int>( A.rows() ) );
2875  const int N ( numeric_cast<int>( A.columns() ) );
2876  const int lda( numeric_cast<int>( A.spacing() ) );
2877  const complex<float> alpha( -scalar );
2878  const complex<float> beta ( 1.0F, 0.0F );
2879 
2880  cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2881  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2882  }
2883 #endif
2884  //**********************************************************************************************
2885 
2886  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2887 #if BLAZE_BLAS_MODE
2888 
2901  template< typename VT1 // Type of the left-hand side target vector
2902  , typename VT2 // Type of the left-hand side vector operand
2903  , typename MT1 // Type of the right-hand side matrix operand
2904  , typename ST2 > // Type of the scalar value
2905  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2906  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2907  {
2908  using boost::numeric_cast;
2909 
2913  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT1::ElementType::value_type );
2914  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename VT2::ElementType::value_type );
2915  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT1::ElementType::value_type );
2916 
2917  const int M ( numeric_cast<int>( A.rows() ) );
2918  const int N ( numeric_cast<int>( A.columns() ) );
2919  const int lda( numeric_cast<int>( A.spacing() ) );
2920  const complex<double> alpha( -scalar );
2921  const complex<double> beta ( 1.0, 0.0 );
2922 
2923  cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2924  A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2925  }
2926 #endif
2927  //**********************************************************************************************
2928 
2929  //**Subtraction assignment to sparse vectors****************************************************
2930  // No special implementation for the subtraction assignment to sparse vectors.
2931  //**********************************************************************************************
2932 
2933  //**Multiplication assignment to dense vectors**************************************************
2945  template< typename VT1 > // Type of the target dense vector
2946  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2947  {
2949 
2953 
2954  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2955 
2956  const ResultType tmp( rhs );
2957  multAssign( ~lhs, tmp );
2958  }
2959  //**********************************************************************************************
2960 
2961  //**Multiplication assignment to sparse vectors*************************************************
2962  // No special implementation for the multiplication assignment to sparse vectors.
2963  //**********************************************************************************************
2964 
2965  //**Compile time checks*************************************************************************
2974  //**********************************************************************************************
2975 };
2977 //*************************************************************************************************
2978 
2979 
2980 
2981 
2982 //=================================================================================================
2983 //
2984 // GLOBAL BINARY ARITHMETIC OPERATORS
2985 //
2986 //=================================================================================================
2987 
2988 //*************************************************************************************************
3019 template< typename T1 // Type of the left-hand side dense vector
3020  , typename T2 > // Type of the right-hand side dense matrix
3021 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
3023 {
3025 
3026  if( (~vec).size() != (~mat).rows() )
3027  throw std::invalid_argument( "Vector and matrix sizes do not match" );
3028 
3029  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
3030 }
3031 //*************************************************************************************************
3032 
3033 
3034 
3035 
3036 //=================================================================================================
3037 //
3038 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
3039 //
3040 //=================================================================================================
3041 
3042 //*************************************************************************************************
3055 template< typename T1 // Type of the left-hand side dense vector
3056  , typename T2 // Type of the right-hand side dense matrix
3057  , bool SO > // Storage order of the right-hand side dense matrix
3058 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
3060 {
3062 
3063  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
3064 }
3065 //*************************************************************************************************
3066 
3067 
3068 
3069 
3070 //=================================================================================================
3071 //
3072 // EXPRESSION TRAIT SPECIALIZATIONS
3073 //
3074 //=================================================================================================
3075 
3076 //*************************************************************************************************
3078 template< typename VT, typename MT >
3079 struct SubvectorExprTrait< TDVecDMatMultExpr<VT,MT> >
3080 {
3081  public:
3082  //**********************************************************************************************
3083  typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT>::Type >::Type Type;
3084  //**********************************************************************************************
3085 };
3087 //*************************************************************************************************
3088 
3089 } // namespace blaze
3090 
3091 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:110
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:233
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:114
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:325
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
Header file for the DenseVector base class.
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:278
Header file for the VecScalarMultExpr base class.
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:243
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:228
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:237
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:250
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:230
Header file for the multiplication trait.
Header file for the IsDouble type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:232
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:337
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:263
Header file for the IsMatMatMultExpr type trait class.
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Constraint on the data type.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:349
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:305
Header file for the IsNumeric type trait.
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:109
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:111
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:240
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:315
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:112
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Header file for the TVecMatMultExpr base class.
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:357
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:129
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:231
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:246
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
Header file for all intrinsic functionality.
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecDMatMultExpr.h:358
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:85
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:113
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:234
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:356
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:138
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:229
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.