TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <blaze/math/blas/Level2.h>
56 #include <blaze/math/Functions.h>
57 #include <blaze/math/Intrinsics.h>
58 #include <blaze/math/shims/Reset.h>
79 #include <blaze/system/BLAS.h>
81 #include <blaze/util/Assert.h>
82 #include <blaze/util/Complex.h>
85 #include <blaze/util/DisableIf.h>
86 #include <blaze/util/EnableIf.h>
88 #include <blaze/util/SelectType.h>
89 #include <blaze/util/Types.h>
95 
96 
97 namespace blaze {
98 
99 //=================================================================================================
100 //
101 // CLASS TDVECDMATMULTEXPR
102 //
103 //=================================================================================================
104 
105 //*************************************************************************************************
112 template< typename VT // Type of the left-hand side dense vector
113  , typename MT > // Type of the right-hand side dense matrix
114 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
115  , private TVecMatMultExpr
116  , private Computation
117 {
118  private:
119  //**Type definitions****************************************************************************
120  typedef typename VT::ResultType VRT;
121  typedef typename MT::ResultType MRT;
122  typedef typename VRT::ElementType VET;
123  typedef typename MRT::ElementType MET;
124  typedef typename VT::CompositeType VCT;
125  typedef typename MT::CompositeType MCT;
126  //**********************************************************************************************
127 
128  //**********************************************************************************************
130  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
131  //**********************************************************************************************
132 
133  //**********************************************************************************************
135  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141 
145  template< typename T1 >
146  struct UseSMPAssign {
147  enum { value = ( evaluateVector || evaluateMatrix ) };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
158  template< typename T1, typename T2, typename T3 >
159  struct UseSinglePrecisionKernel {
160  enum { value = BLAZE_BLAS_MODE &&
161  HasMutableDataAccess<T1>::value &&
162  HasConstDataAccess<T2>::value &&
163  HasConstDataAccess<T3>::value &&
164  !IsDiagonal<T3>::value &&
165  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
166  IsFloat<typename T1::ElementType>::value &&
167  IsFloat<typename T2::ElementType>::value &&
168  IsFloat<typename T3::ElementType>::value };
169  };
171  //**********************************************************************************************
172 
173  //**********************************************************************************************
175 
179  template< typename T1, typename T2, typename T3 >
180  struct UseDoublePrecisionKernel {
181  enum { value = BLAZE_BLAS_MODE &&
182  HasMutableDataAccess<T1>::value &&
183  HasConstDataAccess<T2>::value &&
184  HasConstDataAccess<T3>::value &&
185  !IsDiagonal<T3>::value &&
186  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187  IsDouble<typename T1::ElementType>::value &&
188  IsDouble<typename T2::ElementType>::value &&
189  IsDouble<typename T3::ElementType>::value };
190  };
192  //**********************************************************************************************
193 
194  //**********************************************************************************************
196 
200  template< typename T1, typename T2, typename T3 >
201  struct UseSinglePrecisionComplexKernel {
202  typedef complex<float> Type;
203  enum { value = BLAZE_BLAS_MODE &&
204  HasMutableDataAccess<T1>::value &&
205  HasConstDataAccess<T2>::value &&
206  HasConstDataAccess<T3>::value &&
207  !IsDiagonal<T3>::value &&
208  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
209  IsSame<typename T1::ElementType,Type>::value &&
210  IsSame<typename T2::ElementType,Type>::value &&
211  IsSame<typename T3::ElementType,Type>::value };
212  };
214  //**********************************************************************************************
215 
216  //**********************************************************************************************
218 
222  template< typename T1, typename T2, typename T3 >
223  struct UseDoublePrecisionComplexKernel {
224  typedef complex<double> Type;
225  enum { value = BLAZE_BLAS_MODE &&
226  HasMutableDataAccess<T1>::value &&
227  HasConstDataAccess<T2>::value &&
228  HasConstDataAccess<T3>::value &&
229  !IsDiagonal<T3>::value &&
230  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
231  IsSame<typename T1::ElementType,Type>::value &&
232  IsSame<typename T2::ElementType,Type>::value &&
233  IsSame<typename T3::ElementType,Type>::value };
234  };
236  //**********************************************************************************************
237 
238  //**********************************************************************************************
240 
243  template< typename T1, typename T2, typename T3 >
244  struct UseDefaultKernel {
245  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
246  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
247  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
248  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
249  };
251  //**********************************************************************************************
252 
253  //**********************************************************************************************
255 
259  template< typename T1, typename T2, typename T3 >
260  struct UseVectorizedDefaultKernel {
261  enum { value = !IsDiagonal<T3>::value &&
262  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
263  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
264  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
265  IntrinsicTrait<typename T1::ElementType>::addition &&
266  IntrinsicTrait<typename T1::ElementType>::multiplication };
267  };
269  //**********************************************************************************************
270 
271  public:
272  //**Type definitions****************************************************************************
278  typedef const ElementType ReturnType;
279  typedef const ResultType CompositeType;
280 
282  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
283 
285  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
286 
289 
292  //**********************************************************************************************
293 
294  //**Compilation flags***************************************************************************
296  enum { vectorizable = !IsDiagonal<MT>::value &&
297  VT::vectorizable && MT::vectorizable &&
301 
303  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
304  !evaluateMatrix && MT::smpAssignable };
305  //**********************************************************************************************
306 
307  //**Constructor*********************************************************************************
313  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
314  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
315  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
316  {
317  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
318  }
319  //**********************************************************************************************
320 
321  //**Subscript operator**************************************************************************
327  inline ReturnType operator[]( size_t index ) const {
328  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
329 
330  if( ( IsStrictlyLower<MT>::value && index == mat_.columns()-1UL ) ||
331  ( IsStrictlyUpper<MT>::value && index == 0UL ) ||
332  mat_.rows() == 0UL )
333  return ElementType();
334 
336  return vec_[index] * mat_(index,index);
337 
338  const size_t ibegin( ( IsLower<MT>::value )
339  ?( IsStrictlyLower<MT>::value ? index+1UL : index )
340  :( 0UL ) );
341  const size_t iend( ( IsUpper<MT>::value )
342  ?( IsStrictlyUpper<MT>::value ? index : index+1UL )
343  :( mat_.rows() ) );
344  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
345 
346  const size_t inum( iend - ibegin );
347  const size_t ipos( ibegin + ( ( inum - 1UL ) & size_t(-2) ) + 1UL );
348 
349  ElementType res( vec_[ibegin] * mat_(ibegin,index) );
350 
351  for( size_t i=ibegin+1UL; i<ipos; i+=2UL ) {
352  res += vec_[i] * mat_(i,index) + vec_[i+1UL] * mat_(i+1UL,index);
353  }
354  if( ipos < iend ) {
355  res += vec_[ipos] * mat_(ipos,index);
356  }
357 
358  return res;
359  }
360  //**********************************************************************************************
361 
362  //**Size function*******************************************************************************
367  inline size_t size() const {
368  return mat_.columns();
369  }
370  //**********************************************************************************************
371 
372  //**Left operand access*************************************************************************
377  inline LeftOperand leftOperand() const {
378  return vec_;
379  }
380  //**********************************************************************************************
381 
382  //**Right operand access************************************************************************
387  inline RightOperand rightOperand() const {
388  return mat_;
389  }
390  //**********************************************************************************************
391 
392  //**********************************************************************************************
398  template< typename T >
399  inline bool canAlias( const T* alias ) const {
400  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
401  }
402  //**********************************************************************************************
403 
404  //**********************************************************************************************
410  template< typename T >
411  inline bool isAliased( const T* alias ) const {
412  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
413  }
414  //**********************************************************************************************
415 
416  //**********************************************************************************************
421  inline bool isAligned() const {
422  return vec_.isAligned() && mat_.isAligned();
423  }
424  //**********************************************************************************************
425 
426  //**********************************************************************************************
431  inline bool canSMPAssign() const {
432  return ( !BLAZE_BLAS_IS_PARALLEL ||
433  ( IsComputation<MT>::value && !evaluateMatrix ) ||
434  ( mat_.rows() * mat_.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
436  }
437  //**********************************************************************************************
438 
439  private:
440  //**Member variables****************************************************************************
441  LeftOperand vec_;
442  RightOperand mat_;
443  //**********************************************************************************************
444 
445  //**Assignment to dense vectors*****************************************************************
458  template< typename VT1 > // Type of the target dense vector
459  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
460  {
462 
463  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
464 
465  if( rhs.mat_.rows() == 0UL ) {
466  reset( ~lhs );
467  return;
468  }
469  else if( rhs.mat_.columns() == 0UL ) {
470  return;
471  }
472 
473  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
474  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
475 
476  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
477  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
478  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
479  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
480 
481  TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
482  }
484  //**********************************************************************************************
485 
486  //**Assignment to dense vectors (kernel selection)**********************************************
497  template< typename VT1 // Type of the left-hand side target vector
498  , typename VT2 // Type of the left-hand side vector operand
499  , typename MT1 > // Type of the right-hand side matrix operand
500  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
501  {
502  if( ( IsDiagonal<MT1>::value ) ||
503  ( IsComputation<MT>::value && !evaluateMatrix ) ||
504  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
505  selectSmallAssignKernel( y, x, A );
506  else
507  selectBlasAssignKernel( y, x, A );
508  }
510  //**********************************************************************************************
511 
512  //**Default assignment to dense vectors*********************************************************
526  template< typename VT1 // Type of the left-hand side target vector
527  , typename VT2 // Type of the left-hand side vector operand
528  , typename MT1 > // Type of the right-hand side matrix operand
529  static inline void selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
530  {
531  const size_t M( A.rows() );
532  const size_t N( A.columns() );
533 
534  if( IsStrictlyUpper<MT1>::value ) {
535  reset( y[0] );
536  }
537 
538  if( !IsLower<MT1>::value )
539  {
540  const size_t jbegin( IsStrictlyUpper<MT1>::value ? 1UL : 0UL );
541  for( size_t j=jbegin; j<N; ++j ) {
542  y[j] = x[0UL] * A(0UL,j);
543  }
544  }
545 
546  for( size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
547  {
548  if( IsDiagonal<MT1>::value )
549  {
550  y[i] = x[i] * A(i,i);
551  }
552  else
553  {
554  const size_t jbegin( ( IsUpper<MT1>::value )
555  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
556  :( 0UL ) );
557  const size_t jend( ( IsLower<MT1>::value )
558  ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
559  :( N ) );
560  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
561 
562  const size_t jnum( jend - jbegin );
563  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
564 
565  for( size_t j=jbegin; j<jpos; j+=2UL ) {
566  y[j ] += x[i] * A(i,j );
567  y[j+1UL] += x[i] * A(i,j+1UL);
568  }
569  if( jpos < jend ) {
570  y[jpos] += x[i] * A(i,jpos);
571  }
572  if( IsLower<MT1>::value ) {
573  y[jend] = x[i] * A(i,jend);
574  }
575  }
576  }
577 
578  if( IsStrictlyLower<MT1>::value ) {
579  reset( y[N-1UL] );
580  }
581  }
583  //**********************************************************************************************
584 
585  //**Default assignment to dense vectors (small matrices)****************************************
599  template< typename VT1 // Type of the left-hand side target vector
600  , typename VT2 // Type of the left-hand side vector operand
601  , typename MT1 > // Type of the right-hand side matrix operand
602  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
603  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A )
604  {
605  selectDefaultAssignKernel( y, x, A );
606  }
608  //**********************************************************************************************
609 
610  //**Vectorized default assignment to dense vectors (small matrices)*****************************
624  template< typename VT1 // Type of the left-hand side target vector
625  , typename VT2 // Type of the left-hand side vector operand
626  , typename MT1 > // Type of the right-hand side matrix operand
627  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
628  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A )
629  {
630  typedef IntrinsicTrait<ElementType> IT;
631 
632  const size_t M( A.rows() );
633  const size_t N( A.columns() );
634 
635  size_t j( 0UL );
636 
637  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL )
638  {
639  const size_t ibegin( ( IsLower<MT1>::value )
640  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
641  :( 0UL ) );
642  const size_t iend( ( IsUpper<MT1>::value )
643  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
644  :( M ) );
645  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
646 
647  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
648 
649  for( size_t i=ibegin; i<iend; ++i ) {
650  const IntrinsicType x1( set( x[i] ) );
651  xmm1 = xmm1 + x1 * A.load(i,j );
652  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
653  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
654  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
655  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
656  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
657  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
658  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
659  }
660 
661  y.store( j , xmm1 );
662  y.store( j+IT::size , xmm2 );
663  y.store( j+IT::size*2UL, xmm3 );
664  y.store( j+IT::size*3UL, xmm4 );
665  y.store( j+IT::size*4UL, xmm5 );
666  y.store( j+IT::size*5UL, xmm6 );
667  y.store( j+IT::size*6UL, xmm7 );
668  y.store( j+IT::size*7UL, xmm8 );
669  }
670 
671  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
672  {
673  const size_t ibegin( ( IsLower<MT1>::value )
674  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
675  :( 0UL ) );
676  const size_t iend( ( IsUpper<MT1>::value )
677  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
678  :( M ) );
679  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
680 
681  IntrinsicType xmm1, xmm2, xmm3, xmm4;
682 
683  for( size_t i=ibegin; i<iend; ++i ) {
684  const IntrinsicType x1( set( x[i] ) );
685  xmm1 = xmm1 + x1 * A.load(i,j );
686  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
687  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
688  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
689  }
690 
691  y.store( j , xmm1 );
692  y.store( j+IT::size , xmm2 );
693  y.store( j+IT::size*2UL, xmm3 );
694  y.store( j+IT::size*3UL, xmm4 );
695  }
696 
697  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL )
698  {
699  const size_t ibegin( ( IsLower<MT1>::value )
700  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
701  :( 0UL ) );
702  const size_t iend( ( IsUpper<MT1>::value )
703  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
704  :( M ) );
705  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
706 
707  IntrinsicType xmm1, xmm2, xmm3;
708 
709  for( size_t i=ibegin; i<iend; ++i ) {
710  const IntrinsicType x1( set( x[i] ) );
711  xmm1 = xmm1 + x1 * A.load(i,j );
712  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
713  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
714  }
715 
716  y.store( j , xmm1 );
717  y.store( j+IT::size , xmm2 );
718  y.store( j+IT::size*2UL, xmm3 );
719  }
720 
721  for( ; (j+IT::size) < N; j+=IT::size*2UL )
722  {
723  const size_t ibegin( ( IsLower<MT1>::value )
724  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
725  :( 0UL ) );
726  const size_t iend( ( IsUpper<MT1>::value )
727  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
728  :( M ) );
729  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
730 
731  IntrinsicType xmm1, xmm2;
732 
733  for( size_t i=ibegin; i<iend; ++i ) {
734  const IntrinsicType x1( set( x[i] ) );
735  xmm1 = xmm1 + x1 * A.load(i,j );
736  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
737  }
738 
739  y.store( j , xmm1 );
740  y.store( j+IT::size, xmm2 );
741  }
742 
743  if( j < N )
744  {
745  const size_t ibegin( ( IsLower<MT1>::value )
746  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
747  :( 0UL ) );
748  const size_t iend( ( IsUpper<MT1>::value )
749  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
750  :( M ) );
751  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
752 
753  IntrinsicType xmm1;
754 
755  for( size_t i=ibegin; i<iend; ++i ) {
756  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
757  }
758 
759  y.store( j, xmm1 );
760  }
761  }
763  //**********************************************************************************************
764 
765  //**Default assignment to dense vectors (large matrices)****************************************
779  template< typename VT1 // Type of the left-hand side target vector
780  , typename VT2 // Type of the left-hand side vector operand
781  , typename MT1 > // Type of the right-hand side matrix operand
782  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
783  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A )
784  {
785  selectDefaultAssignKernel( y, x, A );
786  }
788  //**********************************************************************************************
789 
790  //**Vectorized default assignment to dense vectors (large matrices)*****************************
804  template< typename VT1 // Type of the left-hand side target vector
805  , typename VT2 // Type of the left-hand side vector operand
806  , typename MT1 > // Type of the right-hand side matrix operand
807  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
808  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A )
809  {
810  typedef IntrinsicTrait<ElementType> IT;
811 
812  const size_t M( A.rows() );
813  const size_t N( A.columns() );
814 
815  const size_t jblock( 32768UL / sizeof( ElementType ) );
816  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
817 
818  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
819 
820  reset( y );
821 
822  for( size_t jj=0U; jj<N; jj+=jblock ) {
823  for( size_t ii=0UL; ii<M; ii+=iblock )
824  {
825  const size_t iend( min( ii+iblock, M ) );
826  const size_t jtmp( min( jj+jblock, N ) );
827  const size_t jend( ( IsLower<MT1>::value )
828  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
829  :( jtmp ) );
830 
831  size_t j( ( IsUpper<MT1>::value )
832  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
833  :( jj ) );
834 
835  for( ; (j+IT::size*7UL) < jend; j+=IT::size*8UL )
836  {
837  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
838 
839  for( size_t i=ii; i<iend; ++i ) {
840  const IntrinsicType x1( set( x[i] ) );
841  xmm1 = xmm1 + x1 * A.load(i,j );
842  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
843  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
844  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
845  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
846  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
847  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
848  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
849  }
850 
851  y.store( j , y.load(j ) + xmm1 );
852  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
853  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
854  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
855  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5 );
856  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6 );
857  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7 );
858  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8 );
859  }
860 
861  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
862  {
863  IntrinsicType xmm1, xmm2, xmm3, xmm4;
864 
865  for( size_t i=ii; i<iend; ++i ) {
866  const IntrinsicType x1( set( x[i] ) );
867  xmm1 = xmm1 + x1 * A.load(i,j );
868  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
869  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
870  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
871  }
872 
873  y.store( j , y.load(j ) + xmm1 );
874  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
875  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
876  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
877  }
878 
879  for( ; (j+IT::size*2UL) < jend; j+=IT::size*3UL )
880  {
881  IntrinsicType xmm1, xmm2, xmm3;
882 
883  for( size_t i=ii; i<iend; ++i ) {
884  const IntrinsicType x1( set( x[i] ) );
885  xmm1 = xmm1 + x1 * A.load(i,j );
886  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
887  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
888  }
889 
890  y.store( j , y.load(j ) + xmm1 );
891  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
892  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
893  }
894 
895  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
896  {
897  IntrinsicType xmm1, xmm2;
898 
899  for( size_t i=ii; i<iend; ++i ) {
900  const IntrinsicType x1( set( x[i] ) );
901  xmm1 = xmm1 + x1 * A.load(i,j );
902  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
903  }
904 
905  y.store( j , y.load(j ) + xmm1 );
906  y.store( j+IT::size, y.load(j+IT::size) + xmm2 );
907  }
908 
909  if( j < jend )
910  {
911  IntrinsicType xmm1;
912 
913  for( size_t i=ii; i<iend; ++i ) {
914  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
915  }
916 
917  y.store( j, y.load(j) + xmm1 );
918  }
919  }
920  }
921  }
923  //**********************************************************************************************
924 
925  //**BLAS-based assignment to dense vectors (default)********************************************
939  template< typename VT1 // Type of the left-hand side target vector
940  , typename VT2 // Type of the left-hand side vector operand
941  , typename MT1 > // Type of the right-hand side matrix operand
942  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
943  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
944  {
945  selectLargeAssignKernel( y, x, A );
946  }
948  //**********************************************************************************************
949 
950  //**BLAS-based assignment to dense vectors (single precision)***********************************
951 #if BLAZE_BLAS_MODE
952 
965  template< typename VT1 // Type of the left-hand side target vector
966  , typename VT2 // Type of the left-hand side vector operand
967  , typename MT1 > // Type of the right-hand side matrix operand
968  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
969  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
970  {
971  if( IsTriangular<MT1>::value ) {
972  assign( y, x );
973  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
974  }
975  else {
976  sgemv( y, x, A, 1.0F, 0.0F );
977  }
978  }
980 #endif
981  //**********************************************************************************************
982 
983  //**BLAS-based assignment to dense vectors (double precision)***********************************
984 #if BLAZE_BLAS_MODE
985 
998  template< typename VT1 // Type of the left-hand side target vector
999  , typename VT2 // Type of the left-hand side vector operand
1000  , typename MT1 > // Type of the right-hand side matrix operand
1001  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1002  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
1003  {
1004  if( IsTriangular<MT1>::value ) {
1005  assign( y, x );
1006  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1007  }
1008  else {
1009  dgemv( y, x, A, 1.0, 0.0 );
1010  }
1011  }
1013 #endif
1014  //**********************************************************************************************
1015 
1016  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1017 #if BLAZE_BLAS_MODE
1018 
1031  template< typename VT1 // Type of the left-hand side target vector
1032  , typename VT2 // Type of the left-hand side vector operand
1033  , typename MT1 > // Type of the right-hand side matrix operand
1034  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1035  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
1036  {
1037  if( IsTriangular<MT1>::value ) {
1038  assign( y, x );
1039  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1040  }
1041  else {
1042  cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1043  }
1044  }
1046 #endif
1047  //**********************************************************************************************
1048 
1049  //**BLAS-based assignment to dense vectors (double precision complex)***************************
1050 #if BLAZE_BLAS_MODE
1051 
1064  template< typename VT1 // Type of the left-hand side target vector
1065  , typename VT2 // Type of the left-hand side vector operand
1066  , typename MT1 > // Type of the right-hand side matrix operand
1067  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1068  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
1069  {
1070  if( IsTriangular<MT1>::value ) {
1071  assign( y, x );
1072  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1073  }
1074  else {
1075  zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1076  }
1077  }
1079 #endif
1080  //**********************************************************************************************
1081 
1082  //**Assignment to sparse vectors****************************************************************
1095  template< typename VT1 > // Type of the target sparse vector
1096  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1097  {
1099 
1103 
1104  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1105 
1106  const ResultType tmp( serial( rhs ) );
1107  assign( ~lhs, tmp );
1108  }
1110  //**********************************************************************************************
1111 
1112  //**Addition assignment to dense vectors********************************************************
1125  template< typename VT1 > // Type of the target dense vector
1126  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1127  {
1129 
1130  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1131 
1132  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1133  return;
1134  }
1135 
1136  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
1137  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
1138 
1139  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1140  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1141  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1142  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1143 
1144  TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
1145  }
1147  //**********************************************************************************************
1148 
1149  //**Addition assignment to dense vectors (kernel selection)*************************************
1160  template< typename VT1 // Type of the left-hand side target vector
1161  , typename VT2 // Type of the left-hand side vector operand
1162  , typename MT1 > // Type of the right-hand side matrix operand
1163  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1164  {
1165  if( ( IsDiagonal<MT1>::value ) ||
1166  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1167  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1168  selectSmallAddAssignKernel( y, x, A );
1169  else
1170  selectBlasAddAssignKernel( y, x, A );
1171  }
1173  //**********************************************************************************************
1174 
1175  //**Default addition assignment to dense vectors************************************************
1189  template< typename VT1 // Type of the left-hand side target vector
1190  , typename VT2 // Type of the left-hand side vector operand
1191  , typename MT1 > // Type of the right-hand side matrix operand
1192  static inline void selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1193  {
1194  const size_t M( A.rows() );
1195  const size_t N( A.columns() );
1196 
1197  for( size_t i=0UL; i<M; ++i )
1198  {
1199  if( IsDiagonal<MT1>::value )
1200  {
1201  y[i] += x[i] * A(i,i);
1202  }
1203  else
1204  {
1205  const size_t jbegin( ( IsUpper<MT1>::value )
1206  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1207  :( 0UL ) );
1208  const size_t jend( ( IsLower<MT1>::value )
1209  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1210  :( N ) );
1211  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1212 
1213  const size_t jnum( jend - jbegin );
1214  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1215 
1216  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1217  y[j ] += x[i] * A(i,j );
1218  y[j+1UL] += x[i] * A(i,j+1UL);
1219  }
1220  if( jpos < jend ) {
1221  y[jpos] += x[i] * A(i,jpos);
1222  }
1223  }
1224  }
1225  }
1227  //**********************************************************************************************
1228 
1229  //**Default addition assignment to dense vectors (small matrices)*******************************
1243  template< typename VT1 // Type of the left-hand side target vector
1244  , typename VT2 // Type of the left-hand side vector operand
1245  , typename MT1 > // Type of the right-hand side matrix operand
1246  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1247  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1248  {
1249  selectDefaultAddAssignKernel( y, x, A );
1250  }
1252  //**********************************************************************************************
1253 
1254  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1268  template< typename VT1 // Type of the left-hand side target vector
1269  , typename VT2 // Type of the left-hand side vector operand
1270  , typename MT1 > // Type of the right-hand side matrix operand
1271  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1272  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1273  {
1274  typedef IntrinsicTrait<ElementType> IT;
1275 
1276  const size_t M( A.rows() );
1277  const size_t N( A.columns() );
1278 
1279  size_t j( 0UL );
1280 
1281  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL )
1282  {
1283  const size_t ibegin( ( IsLower<MT1>::value )
1284  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1285  :( 0UL ) );
1286  const size_t iend( ( IsUpper<MT1>::value )
1287  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1288  :( M ) );
1289  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1290 
1291  IntrinsicType xmm1( y.load(j ) );
1292  IntrinsicType xmm2( y.load(j+IT::size ) );
1293  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1294  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1295  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1296  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1297  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1298  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1299 
1300  for( size_t i=ibegin; i<iend; ++i ) {
1301  const IntrinsicType x1( set( x[i] ) );
1302  xmm1 = xmm1 + x1 * A.load(i,j );
1303  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1304  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1305  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1306  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
1307  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
1308  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
1309  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
1310  }
1311 
1312  y.store( j , xmm1 );
1313  y.store( j+IT::size , xmm2 );
1314  y.store( j+IT::size*2UL, xmm3 );
1315  y.store( j+IT::size*3UL, xmm4 );
1316  y.store( j+IT::size*4UL, xmm5 );
1317  y.store( j+IT::size*5UL, xmm6 );
1318  y.store( j+IT::size*6UL, xmm7 );
1319  y.store( j+IT::size*7UL, xmm8 );
1320  }
1321 
1322  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
1323  {
1324  const size_t ibegin( ( IsLower<MT1>::value )
1325  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1326  :( 0UL ) );
1327  const size_t iend( ( IsUpper<MT1>::value )
1328  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1329  :( M ) );
1330  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1331 
1332  IntrinsicType xmm1( y.load(j ) );
1333  IntrinsicType xmm2( y.load(j+IT::size ) );
1334  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1335  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1336 
1337  for( size_t i=ibegin; i<iend; ++i ) {
1338  const IntrinsicType x1( set( x[i] ) );
1339  xmm1 = xmm1 + x1 * A.load(i,j );
1340  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1341  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1342  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1343  }
1344 
1345  y.store( j , xmm1 );
1346  y.store( j+IT::size , xmm2 );
1347  y.store( j+IT::size*2UL, xmm3 );
1348  y.store( j+IT::size*3UL, xmm4 );
1349  }
1350 
1351  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL )
1352  {
1353  const size_t ibegin( ( IsLower<MT1>::value )
1354  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1355  :( 0UL ) );
1356  const size_t iend( ( IsUpper<MT1>::value )
1357  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1358  :( M ) );
1359  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1360 
1361  IntrinsicType xmm1( y.load(j ) );
1362  IntrinsicType xmm2( y.load(j+IT::size ) );
1363  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1364 
1365  for( size_t i=ibegin; i<iend; ++i ) {
1366  const IntrinsicType x1( set( x[i] ) );
1367  xmm1 = xmm1 + x1 * A.load(i,j );
1368  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1369  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1370  }
1371 
1372  y.store( j , xmm1 );
1373  y.store( j+IT::size , xmm2 );
1374  y.store( j+IT::size*2UL, xmm3 );
1375  }
1376 
1377  for( ; (j+IT::size) < N; j+=IT::size*2UL )
1378  {
1379  const size_t ibegin( ( IsLower<MT1>::value )
1380  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1381  :( 0UL ) );
1382  const size_t iend( ( IsUpper<MT1>::value )
1383  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1384  :( M ) );
1385  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1386 
1387  IntrinsicType xmm1( y.load(j ) );
1388  IntrinsicType xmm2( y.load(j+IT::size) );
1389 
1390  for( size_t i=ibegin; i<iend; ++i ) {
1391  const IntrinsicType x1( set( x[i] ) );
1392  xmm1 = xmm1 + x1 * A.load(i,j );
1393  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1394  }
1395 
1396  y.store( j , xmm1 );
1397  y.store( j+IT::size, xmm2 );
1398  }
1399 
1400  if( j < N )
1401  {
1402  const size_t ibegin( ( IsLower<MT1>::value )
1403  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1404  :( 0UL ) );
1405  const size_t iend( ( IsUpper<MT1>::value )
1406  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1407  :( M ) );
1408  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1409 
1410  IntrinsicType xmm1( y.load(j) );
1411 
1412  for( size_t i=ibegin; i<iend; ++i ) {
1413  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
1414  }
1415 
1416  y.store( j, xmm1 );
1417  }
1418  }
1420  //**********************************************************************************************
1421 
1422  //**Default addition assignment to dense vectors (large matrices)*******************************
1436  template< typename VT1 // Type of the left-hand side target vector
1437  , typename VT2 // Type of the left-hand side vector operand
1438  , typename MT1 > // Type of the right-hand side matrix operand
1439  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1440  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1441  {
1442  selectDefaultAddAssignKernel( y, x, A );
1443  }
1445  //**********************************************************************************************
1446 
1447  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1461  template< typename VT1 // Type of the left-hand side target vector
1462  , typename VT2 // Type of the left-hand side vector operand
1463  , typename MT1 > // Type of the right-hand side matrix operand
1464  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1465  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1466  {
1467  typedef IntrinsicTrait<ElementType> IT;
1468 
1469  const size_t M( A.rows() );
1470  const size_t N( A.columns() );
1471 
1472  const size_t jblock( 32768UL / sizeof( ElementType ) );
1473  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
1474 
1475  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
1476 
1477  for( size_t jj=0U; jj<N; jj+=jblock ) {
1478  for( size_t ii=0UL; ii<M; ii+=iblock )
1479  {
1480  const size_t iend( min( ii+iblock, M ) );
1481  const size_t jtmp( min( jj+jblock, N ) );
1482  const size_t jend( ( IsLower<MT1>::value )
1483  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
1484  :( jtmp ) );
1485 
1486  size_t j( ( IsUpper<MT1>::value )
1487  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
1488  :( jj ) );
1489 
1490  for( ; (j+IT::size*7UL) < jend; j+=IT::size*8UL )
1491  {
1492  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1493 
1494  for( size_t i=ii; i<iend; ++i ) {
1495  const IntrinsicType x1( set( x[i] ) );
1496  xmm1 = xmm1 + x1 * A.load(i,j );
1497  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1498  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1499  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1500  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
1501  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
1502  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
1503  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
1504  }
1505 
1506  y.store( j , y.load(j ) + xmm1 );
1507  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
1508  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
1509  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
1510  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5 );
1511  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6 );
1512  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7 );
1513  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8 );
1514  }
1515 
1516  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
1517  {
1518  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1519 
1520  for( size_t i=ii; i<iend; ++i ) {
1521  const IntrinsicType x1( set( x[i] ) );
1522  xmm1 = xmm1 + x1 * A.load(i,j );
1523  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1524  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1525  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1526  }
1527 
1528  y.store( j , y.load(j ) + xmm1 );
1529  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
1530  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
1531  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
1532  }
1533 
1534  for( ; (j+IT::size*2UL) < jend; j+=IT::size*3UL )
1535  {
1536  IntrinsicType xmm1, xmm2, xmm3;
1537 
1538  for( size_t i=ii; i<iend; ++i ) {
1539  const IntrinsicType x1( set( x[i] ) );
1540  xmm1 = xmm1 + x1 * A.load(i,j );
1541  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1542  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1543  }
1544 
1545  y.store( j , y.load(j ) + xmm1 );
1546  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
1547  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
1548  }
1549 
1550  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
1551  {
1552  IntrinsicType xmm1, xmm2;
1553 
1554  for( size_t i=ii; i<iend; ++i ) {
1555  const IntrinsicType x1( set( x[i] ) );
1556  xmm1 = xmm1 + x1 * A.load(i,j );
1557  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1558  }
1559 
1560  y.store( j , y.load(j ) + xmm1 );
1561  y.store( j+IT::size, y.load(j+IT::size) + xmm2 );
1562  }
1563 
1564  if( j < jend )
1565  {
1566  IntrinsicType xmm1;
1567 
1568  for( size_t i=ii; i<iend; ++i ) {
1569  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
1570  }
1571 
1572  y.store( j, y.load(j) + xmm1 );
1573  }
1574  }
1575  }
1576  }
1578  //**********************************************************************************************
1579 
1580  //**BLAS-based addition assignment to dense vectors (default)***********************************
1594  template< typename VT1 // Type of the left-hand side target vector
1595  , typename VT2 // Type of the left-hand side vector operand
1596  , typename MT1 > // Type of the right-hand side matrix operand
1597  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1598  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1599  {
1600  selectLargeAddAssignKernel( y, x, A );
1601  }
1603  //**********************************************************************************************
1604 
1605  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1606 #if BLAZE_BLAS_MODE
1607 
1620  template< typename VT1 // Type of the left-hand side target vector
1621  , typename VT2 // Type of the left-hand side vector operand
1622  , typename MT1 > // Type of the right-hand side matrix operand
1623  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1624  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1625  {
1626  if( IsTriangular<MT1>::value ) {
1627  typename VT1::ResultType tmp( x );
1628  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1629  addAssign( y, tmp );
1630  }
1631  else {
1632  sgemv( y, x, A, 1.0F, 1.0F );
1633  }
1634  }
1636 #endif
1637  //**********************************************************************************************
1638 
1639  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1640 #if BLAZE_BLAS_MODE
1641 
1654  template< typename VT1 // Type of the left-hand side target vector
1655  , typename VT2 // Type of the left-hand side vector operand
1656  , typename MT1 > // Type of the right-hand side matrix operand
1657  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1658  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1659  {
1660  if( IsTriangular<MT1>::value ) {
1661  typename VT1::ResultType tmp( x );
1662  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1663  addAssign( y, tmp );
1664  }
1665  else {
1666  dgemv( y, x, A, 1.0, 1.0 );
1667  }
1668  }
1670 #endif
1671  //**********************************************************************************************
1672 
1673  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1674 #if BLAZE_BLAS_MODE
1675 
1688  template< typename VT1 // Type of the left-hand side target vector
1689  , typename VT2 // Type of the left-hand side vector operand
1690  , typename MT1 > // Type of the right-hand side matrix operand
1691  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1692  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1693  {
1694  if( IsTriangular<MT1>::value ) {
1695  typename VT1::ResultType tmp( x );
1696  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1697  addAssign( y, tmp );
1698  }
1699  else {
1700  cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1701  }
1702  }
1704 #endif
1705  //**********************************************************************************************
1706 
1707  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1708 #if BLAZE_BLAS_MODE
1709 
1722  template< typename VT1 // Type of the left-hand side target vector
1723  , typename VT2 // Type of the left-hand side vector operand
1724  , typename MT1 > // Type of the right-hand side matrix operand
1725  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1726  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1727  {
1728  if( IsTriangular<MT1>::value ) {
1729  typename VT1::ResultType tmp( x );
1730  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1731  addAssign( y, tmp );
1732  }
1733  else {
1734  zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1735  }
1736  }
1738 #endif
1739  //**********************************************************************************************
1740 
1741  //**Addition assignment to sparse vectors*******************************************************
1742  // No special implementation for the addition assignment to sparse vectors.
1743  //**********************************************************************************************
1744 
1745  //**Subtraction assignment to dense vectors*****************************************************
1758  template< typename VT1 > // Type of the target dense vector
1759  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1760  {
1762 
1763  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1764 
1765  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1766  return;
1767  }
1768 
1769  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
1770  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
1771 
1772  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1773  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1774  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1775  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1776 
1777  TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1778  }
1780  //**********************************************************************************************
1781 
1782  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1793  template< typename VT1 // Type of the left-hand side target vector
1794  , typename VT2 // Type of the left-hand side vector operand
1795  , typename MT1 > // Type of the right-hand side matrix operand
1796  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1797  {
1798  if( ( IsDiagonal<MT1>::value ) ||
1799  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1800  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1801  selectSmallSubAssignKernel( y, x, A );
1802  else
1803  selectBlasSubAssignKernel( y, x, A );
1804  }
1806  //**********************************************************************************************
1807 
1808  //**Default subtraction assignment to dense vectors*********************************************
1822  template< typename VT1 // Type of the left-hand side target vector
1823  , typename VT2 // Type of the left-hand side vector operand
1824  , typename MT1 > // Type of the right-hand side matrix operand
1825  static inline void selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1826  {
1827  const size_t M( A.rows() );
1828  const size_t N( A.columns() );
1829 
1830  for( size_t i=0UL; i<M; ++i )
1831  {
1832  if( IsDiagonal<MT1>::value )
1833  {
1834  y[i] -= x[i] * A(i,i);
1835  }
1836  else
1837  {
1838  const size_t jbegin( ( IsUpper<MT1>::value )
1839  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1840  :( 0UL ) );
1841  const size_t jend( ( IsLower<MT1>::value )
1842  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1843  :( N ) );
1844  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1845 
1846  const size_t jnum( jend - jbegin );
1847  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1848 
1849  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1850  y[j ] -= x[i] * A(i,j );
1851  y[j+1UL] -= x[i] * A(i,j+1UL);
1852  }
1853  if( jpos < jend ) {
1854  y[jpos] -= x[i] * A(i,jpos);
1855  }
1856  }
1857  }
1858  }
1860  //**********************************************************************************************
1861 
1862  //**Default subtraction assignment to dense vectors (small matrices)****************************
1876  template< typename VT1 // Type of the left-hand side target vector
1877  , typename VT2 // Type of the left-hand side vector operand
1878  , typename MT1 > // Type of the right-hand side matrix operand
1879  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1880  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1881  {
1882  selectDefaultSubAssignKernel( y, x, A );
1883  }
1885  //**********************************************************************************************
1886 
1887  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1902  template< typename VT1 // Type of the left-hand side target vector
1903  , typename VT2 // Type of the left-hand side vector operand
1904  , typename MT1 > // Type of the right-hand side matrix operand
1905  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1906  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1907  {
1908  typedef IntrinsicTrait<ElementType> IT;
1909 
1910  const size_t M( A.rows() );
1911  const size_t N( A.columns() );
1912 
1913  size_t j( 0UL );
1914 
1915  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL )
1916  {
1917  const size_t ibegin( ( IsLower<MT1>::value )
1918  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1919  :( 0UL ) );
1920  const size_t iend( ( IsUpper<MT1>::value )
1921  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1922  :( M ) );
1923  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1924 
1925  IntrinsicType xmm1( y.load(j ) );
1926  IntrinsicType xmm2( y.load(j+IT::size ) );
1927  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1928  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1929  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1930  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1931  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1932  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1933 
1934  for( size_t i=ibegin; i<iend; ++i ) {
1935  const IntrinsicType x1( set( x[i] ) );
1936  xmm1 = xmm1 - x1 * A.load(i,j );
1937  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1938  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1939  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1940  xmm5 = xmm5 - x1 * A.load(i,j+IT::size*4UL);
1941  xmm6 = xmm6 - x1 * A.load(i,j+IT::size*5UL);
1942  xmm7 = xmm7 - x1 * A.load(i,j+IT::size*6UL);
1943  xmm8 = xmm8 - x1 * A.load(i,j+IT::size*7UL);
1944  }
1945 
1946  y.store( j , xmm1 );
1947  y.store( j+IT::size , xmm2 );
1948  y.store( j+IT::size*2UL, xmm3 );
1949  y.store( j+IT::size*3UL, xmm4 );
1950  y.store( j+IT::size*4UL, xmm5 );
1951  y.store( j+IT::size*5UL, xmm6 );
1952  y.store( j+IT::size*6UL, xmm7 );
1953  y.store( j+IT::size*7UL, xmm8 );
1954  }
1955 
1956  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
1957  {
1958  const size_t ibegin( ( IsLower<MT1>::value )
1959  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1960  :( 0UL ) );
1961  const size_t iend( ( IsUpper<MT1>::value )
1962  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1963  :( M ) );
1964  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1965 
1966  IntrinsicType xmm1( y.load(j ) );
1967  IntrinsicType xmm2( y.load(j+IT::size ) );
1968  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1969  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1970 
1971  for( size_t i=ibegin; i<iend; ++i ) {
1972  const IntrinsicType x1( set( x[i] ) );
1973  xmm1 = xmm1 - x1 * A.load(i,j );
1974  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1975  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1976  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1977  }
1978 
1979  y.store( j , xmm1 );
1980  y.store( j+IT::size , xmm2 );
1981  y.store( j+IT::size*2UL, xmm3 );
1982  y.store( j+IT::size*3UL, xmm4 );
1983  }
1984 
1985  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL )
1986  {
1987  const size_t ibegin( ( IsLower<MT1>::value )
1988  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1989  :( 0UL ) );
1990  const size_t iend( ( IsUpper<MT1>::value )
1991  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1992  :( M ) );
1993  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1994 
1995  IntrinsicType xmm1( y.load(j ) );
1996  IntrinsicType xmm2( y.load(j+IT::size ) );
1997  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1998 
1999  for( size_t i=ibegin; i<iend; ++i ) {
2000  const IntrinsicType x1( set( x[i] ) );
2001  xmm1 = xmm1 - x1 * A.load(i,j );
2002  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
2003  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
2004  }
2005 
2006  y.store( j , xmm1 );
2007  y.store( j+IT::size , xmm2 );
2008  y.store( j+IT::size*2UL, xmm3 );
2009  }
2010 
2011  for( ; (j+IT::size) < N; j+=IT::size*2UL )
2012  {
2013  const size_t ibegin( ( IsLower<MT1>::value )
2014  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2015  :( 0UL ) );
2016  const size_t iend( ( IsUpper<MT1>::value )
2017  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2018  :( M ) );
2019  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2020 
2021  IntrinsicType xmm1( y.load(j ) );
2022  IntrinsicType xmm2( y.load(j+IT::size) );
2023 
2024  for( size_t i=ibegin; i<iend; ++i ) {
2025  const IntrinsicType x1( set( x[i] ) );
2026  xmm1 = xmm1 - x1 * A.load(i,j );
2027  xmm2 = xmm2 - x1 * A.load(i,j+IT::size);
2028  }
2029 
2030  y.store( j , xmm1 );
2031  y.store( j+IT::size, xmm2 );
2032  }
2033 
2034  if( j < N )
2035  {
2036  const size_t ibegin( ( IsLower<MT1>::value )
2037  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2038  :( 0UL ) );
2039  const size_t iend( ( IsUpper<MT1>::value )
2040  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2041  :( M ) );
2042  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2043 
2044  IntrinsicType xmm1( y.load(j) );
2045 
2046  for( size_t i=ibegin; i<iend; ++i ) {
2047  xmm1 = xmm1 - set( x[i] ) * A.load(i,j);
2048  }
2049 
2050  y.store( j, xmm1 );
2051  }
2052  }
2054  //**********************************************************************************************
2055 
2056  //**Default subtraction assignment to dense vectors (large matrices)****************************
2070  template< typename VT1 // Type of the left-hand side target vector
2071  , typename VT2 // Type of the left-hand side vector operand
2072  , typename MT1 > // Type of the right-hand side matrix operand
2073  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
2074  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2075  {
2076  selectDefaultSubAssignKernel( y, x, A );
2077  }
2079  //**********************************************************************************************
2080 
2081  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
2096  template< typename VT1 // Type of the left-hand side target vector
2097  , typename VT2 // Type of the left-hand side vector operand
2098  , typename MT1 > // Type of the right-hand side matrix operand
2099  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
2100  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2101  {
2102  typedef IntrinsicTrait<ElementType> IT;
2103 
2104  const size_t M( A.rows() );
2105  const size_t N( A.columns() );
2106 
2107  const size_t jblock( 32768UL / sizeof( ElementType ) );
2108  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
2109 
2110  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
2111 
2112  for( size_t jj=0U; jj<N; jj+=jblock ) {
2113  for( size_t ii=0UL; ii<M; ii+=iblock )
2114  {
2115  const size_t iend( min( ii+iblock, M ) );
2116  const size_t jtmp( min( jj+jblock, N ) );
2117  const size_t jend( ( IsLower<MT1>::value )
2118  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
2119  :( jtmp ) );
2120 
2121  size_t j( ( IsUpper<MT1>::value )
2122  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
2123  :( jj ) );
2124 
2125  for( ; (j+IT::size*7UL) < jend; j+=IT::size*8UL )
2126  {
2127  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2128 
2129  for( size_t i=ii; i<iend; ++i ) {
2130  const IntrinsicType x1( set( x[i] ) );
2131  xmm1 = xmm1 + x1 * A.load(i,j );
2132  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2133  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2134  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2135  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2136  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2137  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2138  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2139  }
2140 
2141  y.store( j , y.load(j ) - xmm1 );
2142  y.store( j+IT::size , y.load(j+IT::size ) - xmm2 );
2143  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3 );
2144  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4 );
2145  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5 );
2146  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6 );
2147  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7 );
2148  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8 );
2149  }
2150 
2151  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
2152  {
2153  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2154 
2155  for( size_t i=ii; i<iend; ++i ) {
2156  const IntrinsicType x1( set( x[i] ) );
2157  xmm1 = xmm1 + x1 * A.load(i,j );
2158  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2159  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2160  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2161  }
2162 
2163  y.store( j , y.load(j ) - xmm1 );
2164  y.store( j+IT::size , y.load(j+IT::size ) - xmm2 );
2165  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3 );
2166  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4 );
2167  }
2168 
2169  for( ; (j+IT::size*2UL) < jend; j+=IT::size*3UL )
2170  {
2171  IntrinsicType xmm1, xmm2, xmm3;
2172 
2173  for( size_t i=ii; i<iend; ++i ) {
2174  const IntrinsicType x1( set( x[i] ) );
2175  xmm1 = xmm1 + x1 * A.load(i,j );
2176  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2177  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2178  }
2179 
2180  y.store( j , y.load(j ) - xmm1 );
2181  y.store( j+IT::size , y.load(j+IT::size ) - xmm2 );
2182  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3 );
2183  }
2184 
2185  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
2186  {
2187  IntrinsicType xmm1, xmm2;
2188 
2189  for( size_t i=ii; i<iend; ++i ) {
2190  const IntrinsicType x1( set( x[i] ) );
2191  xmm1 = xmm1 + x1 * A.load(i,j );
2192  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2193  }
2194 
2195  y.store( j , y.load(j ) - xmm1 );
2196  y.store( j+IT::size, y.load(j+IT::size) - xmm2 );
2197  }
2198 
2199  if( j < jend )
2200  {
2201  IntrinsicType xmm1;
2202 
2203  for( size_t i=ii; i<iend; ++i ) {
2204  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2205  }
2206 
2207  y.store( j, y.load(j) - xmm1 );
2208  }
2209  }
2210  }
2211  }
2213  //**********************************************************************************************
2214 
2215  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2229  template< typename VT1 // Type of the left-hand side target vector
2230  , typename VT2 // Type of the left-hand side vector operand
2231  , typename MT1 > // Type of the right-hand side matrix operand
2232  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
2233  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2234  {
2235  selectLargeSubAssignKernel( y, x, A );
2236  }
2238  //**********************************************************************************************
2239 
2240  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2241 #if BLAZE_BLAS_MODE
2242 
2255  template< typename VT1 // Type of the left-hand side target vector
2256  , typename VT2 // Type of the left-hand side vector operand
2257  , typename MT1 > // Type of the right-hand side matrix operand
2258  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
2259  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2260  {
2261  if( IsTriangular<MT1>::value ) {
2262  typename VT1::ResultType tmp( x );
2263  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2264  subAssign( y, tmp );
2265  }
2266  else {
2267  sgemv( y, x, A, -1.0F, 1.0F );
2268  }
2269  }
2271 #endif
2272  //**********************************************************************************************
2273 
2274  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2275 #if BLAZE_BLAS_MODE
2276 
2289  template< typename VT1 // Type of the left-hand side target vector
2290  , typename VT2 // Type of the left-hand side vector operand
2291  , typename MT1 > // Type of the right-hand side matrix operand
2292  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
2293  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2294  {
2295  if( IsTriangular<MT1>::value ) {
2296  typename VT1::ResultType tmp( x );
2297  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2298  subAssign( y, tmp );
2299  }
2300  else {
2301  dgemv( y, x, A, -1.0, 1.0 );
2302  }
2303  }
2305 #endif
2306  //**********************************************************************************************
2307 
2308  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2309 #if BLAZE_BLAS_MODE
2310 
2323  template< typename VT1 // Type of the left-hand side target vector
2324  , typename VT2 // Type of the left-hand side vector operand
2325  , typename MT1 > // Type of the right-hand side matrix operand
2326  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2327  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2328  {
2329  if( IsTriangular<MT1>::value ) {
2330  typename VT1::ResultType tmp( x );
2331  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2332  subAssign( y, tmp );
2333  }
2334  else {
2335  cgemv( y, x, A, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2336  }
2337  }
2339 #endif
2340  //**********************************************************************************************
2341 
2342  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2343 #if BLAZE_BLAS_MODE
2344 
2357  template< typename VT1 // Type of the left-hand side target vector
2358  , typename VT2 // Type of the left-hand side vector operand
2359  , typename MT1 > // Type of the right-hand side matrix operand
2360  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2361  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2362  {
2363  if( IsTriangular<MT1>::value ) {
2364  typename VT1::ResultType tmp( x );
2365  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2366  subAssign( y, tmp );
2367  }
2368  else {
2369  zgemv( y, x, A, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2370  }
2371  }
2373 #endif
2374  //**********************************************************************************************
2375 
2376  //**Subtraction assignment to sparse vectors****************************************************
2377  // No special implementation for the subtraction assignment to sparse vectors.
2378  //**********************************************************************************************
2379 
2380  //**Multiplication assignment to dense vectors**************************************************
2393  template< typename VT1 > // Type of the target dense vector
2394  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2395  {
2397 
2401 
2402  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2403 
2404  const ResultType tmp( serial( rhs ) );
2405  multAssign( ~lhs, tmp );
2406  }
2408  //**********************************************************************************************
2409 
2410  //**Multiplication assignment to sparse vectors*************************************************
2411  // No special implementation for the multiplication assignment to sparse vectors.
2412  //**********************************************************************************************
2413 
2414  //**SMP assignment to dense vectors*************************************************************
2429  template< typename VT1 > // Type of the target dense vector
2430  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2431  smpAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2432  {
2434 
2435  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2436 
2437  if( rhs.mat_.rows() == 0UL ) {
2438  reset( ~lhs );
2439  return;
2440  }
2441  else if( rhs.mat_.columns() == 0UL ) {
2442  return;
2443  }
2444 
2445  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2446  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2447 
2448  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2449  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2450  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2451  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2452 
2453  smpAssign( ~lhs, x * A );
2454  }
2456  //**********************************************************************************************
2457 
2458  //**SMP assignment to sparse vectors************************************************************
2473  template< typename VT1 > // Type of the target sparse vector
2474  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2475  smpAssign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2476  {
2478 
2482 
2483  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2484 
2485  const ResultType tmp( rhs );
2486  smpAssign( ~lhs, tmp );
2487  }
2489  //**********************************************************************************************
2490 
2491  //**SMP addition assignment to dense vectors****************************************************
2506  template< typename VT1 > // Type of the target dense vector
2507  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2508  smpAddAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2509  {
2511 
2512  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2513 
2514  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2515  return;
2516  }
2517 
2518  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2519  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2520 
2521  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2522  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2523  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2524  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2525 
2526  smpAddAssign( ~lhs, x * A );
2527  }
2529  //**********************************************************************************************
2530 
2531  //**SMP addition assignment to sparse vectors***************************************************
2532  // No special implementation for the SMP addition assignment to sparse vectors.
2533  //**********************************************************************************************
2534 
2535  //**SMP subtraction assignment to dense vectors*************************************************
2550  template< typename VT1 > // Type of the target dense vector
2551  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2552  smpSubAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2553  {
2555 
2556  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2557 
2558  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2559  return;
2560  }
2561 
2562  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2563  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2564 
2565  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2566  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2567  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2568  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2569 
2570  smpSubAssign( ~lhs, x * A );
2571  }
2573  //**********************************************************************************************
2574 
2575  //**SMP subtraction assignment to sparse vectors************************************************
2576  // No special implementation for the SMP subtraction assignment to sparse vectors.
2577  //**********************************************************************************************
2578 
2579  //**SMP multiplication assignment to dense vectors**********************************************
2594  template< typename VT1 > // Type of the target dense vector
2595  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2596  smpMultAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2597  {
2599 
2603 
2604  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2605 
2606  const ResultType tmp( rhs );
2607  smpMultAssign( ~lhs, tmp );
2608  }
2610  //**********************************************************************************************
2611 
2612  //**SMP multiplication assignment to sparse vectors*********************************************
2613  // No special implementation for the SMP multiplication assignment to sparse vectors.
2614  //**********************************************************************************************
2615 
2616  //**Compile time checks*************************************************************************
2624  //**********************************************************************************************
2625 };
2626 //*************************************************************************************************
2627 
2628 
2629 
2630 
2631 //=================================================================================================
2632 //
2633 // DVECSCALARMULTEXPR SPECIALIZATION
2634 //
2635 //=================================================================================================
2636 
2637 //*************************************************************************************************
2645 template< typename VT // Type of the left-hand side dense vector
2646  , typename MT // Type of the right-hand side dense matrix
2647  , typename ST > // Type of the side scalar value
2648 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
2649  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
2650  , private VecScalarMultExpr
2651  , private Computation
2652 {
2653  private:
2654  //**Type definitions****************************************************************************
2655  typedef TDVecDMatMultExpr<VT,MT> VMM;
2656  typedef typename VMM::ResultType RES;
2657  typedef typename VT::ResultType VRT;
2658  typedef typename MT::ResultType MRT;
2659  typedef typename VRT::ElementType VET;
2660  typedef typename MRT::ElementType MET;
2661  typedef typename VT::CompositeType VCT;
2662  typedef typename MT::CompositeType MCT;
2663  //**********************************************************************************************
2664 
2665  //**********************************************************************************************
2667  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2668  //**********************************************************************************************
2669 
2670  //**********************************************************************************************
2672  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2673  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2674  //**********************************************************************************************
2675 
2676  //**********************************************************************************************
2678 
2681  template< typename T1 >
2682  struct UseSMPAssign {
2683  enum { value = ( evaluateVector || evaluateMatrix ) };
2684  };
2685  //**********************************************************************************************
2686 
2687  //**********************************************************************************************
2689 
2692  template< typename T1, typename T2, typename T3, typename T4 >
2693  struct UseSinglePrecisionKernel {
2694  enum { value = BLAZE_BLAS_MODE &&
2695  HasMutableDataAccess<T1>::value &&
2696  HasConstDataAccess<T2>::value &&
2697  HasConstDataAccess<T3>::value &&
2698  !IsDiagonal<T3>::value &&
2699  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2700  IsFloat<typename T1::ElementType>::value &&
2701  IsFloat<typename T2::ElementType>::value &&
2702  IsFloat<typename T3::ElementType>::value &&
2703  !IsComplex<T4>::value };
2704  };
2705  //**********************************************************************************************
2706 
2707  //**********************************************************************************************
2709 
2712  template< typename T1, typename T2, typename T3, typename T4 >
2713  struct UseDoublePrecisionKernel {
2714  enum { value = BLAZE_BLAS_MODE &&
2715  HasMutableDataAccess<T1>::value &&
2716  HasConstDataAccess<T2>::value &&
2717  HasConstDataAccess<T3>::value &&
2718  !IsDiagonal<T3>::value &&
2719  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2720  IsDouble<typename T1::ElementType>::value &&
2721  IsDouble<typename T2::ElementType>::value &&
2722  IsDouble<typename T3::ElementType>::value &&
2723  !IsComplex<T4>::value };
2724  };
2725  //**********************************************************************************************
2726 
2727  //**********************************************************************************************
2729 
2732  template< typename T1, typename T2, typename T3 >
2733  struct UseSinglePrecisionComplexKernel {
2734  typedef complex<float> Type;
2735  enum { value = BLAZE_BLAS_MODE &&
2736  HasMutableDataAccess<T1>::value &&
2737  HasConstDataAccess<T2>::value &&
2738  HasConstDataAccess<T3>::value &&
2739  !IsDiagonal<T3>::value &&
2740  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2741  IsSame<typename T1::ElementType,Type>::value &&
2742  IsSame<typename T2::ElementType,Type>::value &&
2743  IsSame<typename T3::ElementType,Type>::value };
2744  };
2745  //**********************************************************************************************
2746 
2747  //**********************************************************************************************
2749 
2752  template< typename T1, typename T2, typename T3 >
2753  struct UseDoublePrecisionComplexKernel {
2754  typedef complex<double> Type;
2755  enum { value = BLAZE_BLAS_MODE &&
2756  HasMutableDataAccess<T1>::value &&
2757  HasConstDataAccess<T2>::value &&
2758  HasConstDataAccess<T3>::value &&
2759  !IsDiagonal<T3>::value &&
2760  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2761  IsSame<typename T1::ElementType,Type>::value &&
2762  IsSame<typename T2::ElementType,Type>::value &&
2763  IsSame<typename T3::ElementType,Type>::value };
2764  };
2765  //**********************************************************************************************
2766 
2767  //**********************************************************************************************
2769 
2771  template< typename T1, typename T2, typename T3, typename T4 >
2772  struct UseDefaultKernel {
2773  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2774  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2775  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2776  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2777  };
2778  //**********************************************************************************************
2779 
2780  //**********************************************************************************************
2782 
2785  template< typename T1, typename T2, typename T3, typename T4 >
2786  struct UseVectorizedDefaultKernel {
2787  enum { value = !IsDiagonal<T3>::value &&
2788  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2789  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2790  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2791  IsSame<typename T1::ElementType,T4>::value &&
2792  IntrinsicTrait<typename T1::ElementType>::addition &&
2793  IntrinsicTrait<typename T1::ElementType>::multiplication };
2794  };
2795  //**********************************************************************************************
2796 
2797  public:
2798  //**Type definitions****************************************************************************
2799  typedef DVecScalarMultExpr<VMM,ST,true> This;
2800  typedef typename MultTrait<RES,ST>::Type ResultType;
2801  typedef typename ResultType::TransposeType TransposeType;
2802  typedef typename ResultType::ElementType ElementType;
2803  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2804  typedef const ElementType ReturnType;
2805  typedef const ResultType CompositeType;
2806 
2808  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
2809 
2811  typedef ST RightOperand;
2812 
2814  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
2815 
2817  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
2818  //**********************************************************************************************
2819 
2820  //**Compilation flags***************************************************************************
2822  enum { vectorizable = !IsDiagonal<MT>::value &&
2823  VT::vectorizable && MT::vectorizable &&
2824  IsSame<VET,MET>::value &&
2825  IsSame<VET,ST>::value &&
2826  IntrinsicTrait<VET>::addition &&
2827  IntrinsicTrait<VET>::multiplication };
2828 
2830  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2831  !evaluateMatrix && MT::smpAssignable };
2832  //**********************************************************************************************
2833 
2834  //**Constructor*********************************************************************************
2840  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
2841  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2842  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2843  {}
2844  //**********************************************************************************************
2845 
2846  //**Subscript operator**************************************************************************
2852  inline ReturnType operator[]( size_t index ) const {
2853  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2854  return vector_[index] * scalar_;
2855  }
2856  //**********************************************************************************************
2857 
2858  //**Size function*******************************************************************************
2863  inline size_t size() const {
2864  return vector_.size();
2865  }
2866  //**********************************************************************************************
2867 
2868  //**Left operand access*************************************************************************
2873  inline LeftOperand leftOperand() const {
2874  return vector_;
2875  }
2876  //**********************************************************************************************
2877 
2878  //**Right operand access************************************************************************
2883  inline RightOperand rightOperand() const {
2884  return scalar_;
2885  }
2886  //**********************************************************************************************
2887 
2888  //**********************************************************************************************
2894  template< typename T >
2895  inline bool canAlias( const T* alias ) const {
2896  return vector_.canAlias( alias );
2897  }
2898  //**********************************************************************************************
2899 
2900  //**********************************************************************************************
2906  template< typename T >
2907  inline bool isAliased( const T* alias ) const {
2908  return vector_.isAliased( alias );
2909  }
2910  //**********************************************************************************************
2911 
2912  //**********************************************************************************************
2917  inline bool isAligned() const {
2918  return vector_.isAligned();
2919  }
2920  //**********************************************************************************************
2921 
2922  //**********************************************************************************************
2927  inline bool canSMPAssign() const {
2928  typename VMM::RightOperand A( vector_.rightOperand() );
2929  return ( !BLAZE_BLAS_IS_PARALLEL ||
2930  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2931  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
2933  }
2934  //**********************************************************************************************
2935 
2936  private:
2937  //**Member variables****************************************************************************
2938  LeftOperand vector_;
2939  RightOperand scalar_;
2940  //**********************************************************************************************
2941 
2942  //**Assignment to dense vectors*****************************************************************
2954  template< typename VT1 > // Type of the target dense vector
2955  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2956  {
2958 
2959  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2960 
2961  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2962  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2963 
2964  if( right.rows() == 0UL ) {
2965  reset( ~lhs );
2966  return;
2967  }
2968  else if( right.columns() == 0UL ) {
2969  return;
2970  }
2971 
2972  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
2973  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2974 
2975  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2976  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2977  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2978  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2979 
2980  DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2981  }
2982  //**********************************************************************************************
2983 
2984  //**Assignment to dense vectors (kernel selection)**********************************************
2995  template< typename VT1 // Type of the left-hand side target vector
2996  , typename VT2 // Type of the left-hand side vector operand
2997  , typename MT1 // Type of the right-hand side matrix operand
2998  , typename ST2 > // Type of the scalar value
2999  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3000  {
3001  if( ( IsDiagonal<MT1>::value ) ||
3002  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3003  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3004  selectSmallAssignKernel( y, x, A, scalar );
3005  else
3006  selectBlasAssignKernel( y, x, A, scalar );
3007  }
3008  //**********************************************************************************************
3009 
3010  //**Default assignment to dense vectors*********************************************************
3024  template< typename VT1 // Type of the left-hand side target vector
3025  , typename VT2 // Type of the left-hand side vector operand
3026  , typename MT1 // Type of the right-hand side matrix operand
3027  , typename ST2 > // Type of the scalar value
3028  static inline void selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3029  {
3030  const size_t M( A.rows() );
3031  const size_t N( A.columns() );
3032 
3033  if( IsStrictlyUpper<MT1>::value ) {
3034  reset( y[0] );
3035  }
3036 
3037  if( !IsLower<MT1>::value )
3038  {
3039  for( size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<N; ++j ) {
3040  y[j] = x[0UL] * A(0UL,j);
3041  }
3042  }
3043 
3044  for( size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
3045  {
3046  if( IsDiagonal<MT1>::value )
3047  {
3048  y[i] = x[i] * A(i,i) * scalar;
3049  }
3050  else
3051  {
3052  const size_t jbegin( ( IsUpper<MT1>::value )
3053  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3054  :( 0UL ) );
3055  const size_t jend( ( IsLower<MT1>::value )
3056  ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
3057  :( N ) );
3058  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3059 
3060  const size_t jnum( jend - jbegin );
3061  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
3062 
3063  for( size_t j=jbegin; j<jpos; j+=2UL ) {
3064  y[j ] += x[i] * A(i,j );
3065  y[j+1UL] += x[i] * A(i,j+1UL);
3066  }
3067  if( jpos < jend ) {
3068  y[jpos] += x[i] * A(i,jpos);
3069  }
3070  if( IsLower<MT1>::value ) {
3071  y[jend] = x[i] * A(i,jend);
3072  }
3073  }
3074  }
3075 
3076  if( IsStrictlyLower<MT1>::value ) {
3077  reset( y[N-1UL] );
3078  }
3079 
3080  if( !IsDiagonal<MT1>::value )
3081  {
3082  const size_t iend( IsStrictlyLower<MT1>::value ? N-1UL : N );
3083  for( size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<iend; ++j ) {
3084  y[j] *= scalar;
3085  }
3086  }
3087  }
3088  //**********************************************************************************************
3089 
3090  //**Default assignment to dense vectors (small matrices)****************************************
3104  template< typename VT1 // Type of the left-hand side target vector
3105  , typename VT2 // Type of the left-hand side vector operand
3106  , typename MT1 // Type of the right-hand side matrix operand
3107  , typename ST2 > // Type of the scalar value
3108  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3109  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3110  {
3111  selectDefaultAssignKernel( y, x, A, scalar );
3112  }
3113  //**********************************************************************************************
3114 
3115  //**Default assignment to dense vectors (small matrices)****************************************
3129  template< typename VT1 // Type of the left-hand side target vector
3130  , typename VT2 // Type of the left-hand side vector operand
3131  , typename MT1 // Type of the right-hand side matrix operand
3132  , typename ST2 > // Type of the scalar value
3133  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3134  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3135  {
3136  typedef IntrinsicTrait<ElementType> IT;
3137 
3138  const size_t M( A.rows() );
3139  const size_t N( A.columns() );
3140 
3141  const IntrinsicType factor( set( scalar ) );
3142 
3143  size_t j( 0UL );
3144 
3145  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL )
3146  {
3147  const size_t ibegin( ( IsLower<MT1>::value )
3148  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3149  :( 0UL ) );
3150  const size_t iend( ( IsUpper<MT1>::value )
3151  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3152  :( M ) );
3153  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3154 
3155  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3156 
3157  for( size_t i=ibegin; i<iend; ++i ) {
3158  const IntrinsicType x1( set( x[i] ) );
3159  xmm1 = xmm1 + x1 * A.load(i,j );
3160  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3161  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3162  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3163  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3164  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3165  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3166  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3167  }
3168 
3169  y.store( j , xmm1*factor );
3170  y.store( j+IT::size , xmm2*factor );
3171  y.store( j+IT::size*2UL, xmm3*factor );
3172  y.store( j+IT::size*3UL, xmm4*factor );
3173  y.store( j+IT::size*4UL, xmm5*factor );
3174  y.store( j+IT::size*5UL, xmm6*factor );
3175  y.store( j+IT::size*6UL, xmm7*factor );
3176  y.store( j+IT::size*7UL, xmm8*factor );
3177  }
3178 
3179  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
3180  {
3181  const size_t ibegin( ( IsLower<MT1>::value )
3182  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3183  :( 0UL ) );
3184  const size_t iend( ( IsUpper<MT1>::value )
3185  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3186  :( M ) );
3187  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3188 
3189  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3190 
3191  for( size_t i=ibegin; i<iend; ++i ) {
3192  const IntrinsicType x1( set( x[i] ) );
3193  xmm1 = xmm1 + x1 * A.load(i,j );
3194  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3195  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3196  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3197  }
3198 
3199  y.store( j , xmm1*factor );
3200  y.store( j+IT::size , xmm2*factor );
3201  y.store( j+IT::size*2UL, xmm3*factor );
3202  y.store( j+IT::size*3UL, xmm4*factor );
3203  }
3204 
3205  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL )
3206  {
3207  const size_t ibegin( ( IsLower<MT1>::value )
3208  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3209  :( 0UL ) );
3210  const size_t iend( ( IsUpper<MT1>::value )
3211  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3212  :( M ) );
3213  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3214 
3215  IntrinsicType xmm1, xmm2, xmm3;
3216 
3217  for( size_t i=ibegin; i<iend; ++i ) {
3218  const IntrinsicType x1( set( x[i] ) );
3219  xmm1 = xmm1 + x1 * A.load(i,j );
3220  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3221  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3222  }
3223 
3224  y.store( j , xmm1*factor );
3225  y.store( j+IT::size , xmm2*factor );
3226  y.store( j+IT::size*2UL, xmm3*factor );
3227  }
3228 
3229  for( ; (j+IT::size) < N; j+=IT::size*2UL )
3230  {
3231  const size_t ibegin( ( IsLower<MT1>::value )
3232  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3233  :( 0UL ) );
3234  const size_t iend( ( IsUpper<MT1>::value )
3235  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3236  :( M ) );
3237  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3238 
3239  IntrinsicType xmm1, xmm2;
3240 
3241  for( size_t i=ibegin; i<iend; ++i ) {
3242  const IntrinsicType x1( set( x[i] ) );
3243  xmm1 = xmm1 + x1 * A.load(i,j );
3244  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3245  }
3246 
3247  y.store( j , xmm1*factor );
3248  y.store( j+IT::size, xmm2*factor );
3249  }
3250 
3251  if( j < N )
3252  {
3253  const size_t ibegin( ( IsLower<MT1>::value )
3254  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3255  :( 0UL ) );
3256  const size_t iend( ( IsUpper<MT1>::value )
3257  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3258  :( M ) );
3259  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3260 
3261  IntrinsicType xmm1;
3262 
3263  for( size_t i=ibegin; i<iend; ++i ) {
3264  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3265  }
3266 
3267  y.store( j, xmm1*factor );
3268  }
3269  }
3270  //**********************************************************************************************
3271 
3272  //**Default assignment to dense vectors (large matrices)****************************************
3286  template< typename VT1 // Type of the left-hand side target vector
3287  , typename VT2 // Type of the left-hand side vector operand
3288  , typename MT1 // Type of the right-hand side matrix operand
3289  , typename ST2 > // Type of the scalar value
3290  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3291  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3292  {
3293  selectDefaultAssignKernel( y, x, A, scalar );
3294  }
3295  //**********************************************************************************************
3296 
3297  //**Default assignment to dense vectors (large matrices)****************************************
3311  template< typename VT1 // Type of the left-hand side target vector
3312  , typename VT2 // Type of the left-hand side vector operand
3313  , typename MT1 // Type of the right-hand side matrix operand
3314  , typename ST2 > // Type of the scalar value
3315  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3316  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3317  {
3318  typedef IntrinsicTrait<ElementType> IT;
3319 
3320  const size_t M( A.rows() );
3321  const size_t N( A.columns() );
3322 
3323  const size_t jblock( 32768UL / sizeof( ElementType ) );
3324  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3325 
3326  const IntrinsicType factor( set( scalar ) );
3327 
3328  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
3329 
3330  reset( y );
3331 
3332  for( size_t jj=0U; jj<N; jj+=jblock ) {
3333  for( size_t ii=0UL; ii<M; ii+=iblock )
3334  {
3335  const size_t iend( min( ii+iblock, M ) );
3336  const size_t jtmp( min( jj+jblock, N ) );
3337  const size_t jend( ( IsLower<MT1>::value )
3338  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3339  :( jtmp ) );
3340 
3341  size_t j( ( IsUpper<MT1>::value )
3342  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
3343  :( jj ) );
3344 
3345  for( ; (j+IT::size*7UL) < jend; j+=IT::size*8UL )
3346  {
3347  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3348 
3349  for( size_t i=ii; i<iend; ++i ) {
3350  const IntrinsicType x1( set( x[i] ) );
3351  xmm1 = xmm1 + x1 * A.load(i,j );
3352  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3353  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3354  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3355  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3356  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3357  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3358  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3359  }
3360 
3361  y.store( j , y.load(j ) + xmm1*factor );
3362  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3363  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3364  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3365  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
3366  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
3367  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
3368  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
3369  }
3370 
3371  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
3372  {
3373  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3374 
3375  for( size_t i=ii; i<iend; ++i ) {
3376  const IntrinsicType x1( set( x[i] ) );
3377  xmm1 = xmm1 + x1 * A.load(i,j );
3378  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3379  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3380  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3381  }
3382 
3383  y.store( j , y.load(j ) + xmm1*factor );
3384  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3385  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3386  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3387  }
3388 
3389  for( ; (j+IT::size*2UL) < jend; j+=IT::size*3UL )
3390  {
3391  IntrinsicType xmm1, xmm2, xmm3;
3392 
3393  for( size_t i=ii; i<iend; ++i ) {
3394  const IntrinsicType x1( set( x[i] ) );
3395  xmm1 = xmm1 + x1 * A.load(i,j );
3396  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3397  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3398  }
3399 
3400  y.store( j , y.load(j ) + xmm1*factor );
3401  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3402  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3403  }
3404 
3405  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
3406  {
3407  IntrinsicType xmm1, xmm2;
3408 
3409  for( size_t i=ii; i<iend; ++i ) {
3410  const IntrinsicType x1( set( x[i] ) );
3411  xmm1 = xmm1 + x1 * A.load(i,j );
3412  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3413  }
3414 
3415  y.store( j , y.load(j ) + xmm1*factor );
3416  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
3417  }
3418 
3419  if( j < jend )
3420  {
3421  IntrinsicType xmm1;
3422 
3423  for( size_t i=ii; i<iend; ++i ) {
3424  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3425  }
3426 
3427  y.store( j, y.load(j) + xmm1*factor );
3428  }
3429  }
3430  }
3431  }
3432  //**********************************************************************************************
3433 
3434  //**BLAS-based assignment to dense vectors (default)********************************************
3447  template< typename VT1 // Type of the left-hand side target vector
3448  , typename VT2 // Type of the left-hand side vector operand
3449  , typename MT1 // Type of the right-hand side matrix operand
3450  , typename ST2 > // Type of the scalar value
3451  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3452  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3453  {
3454  selectLargeAssignKernel( y, x, A, scalar );
3455  }
3456  //**********************************************************************************************
3457 
3458  //**BLAS-based assignment to dense vectors (single precision)***********************************
3459 #if BLAZE_BLAS_MODE
3460 
3473  template< typename VT1 // Type of the left-hand side target vector
3474  , typename VT2 // Type of the left-hand side vector operand
3475  , typename MT1 // Type of the right-hand side matrix operand
3476  , typename ST2 > // Type of the scalar value
3477  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3478  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3479  {
3480  if( IsTriangular<MT1>::value ) {
3481  assign( y, scalar * x );
3482  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3483  }
3484  else {
3485  sgemv( y, x, A, scalar, 0.0F );
3486  }
3487  }
3488 #endif
3489  //**********************************************************************************************
3490 
3491  //**BLAS-based assignment to dense vectors (double precision)***********************************
3492 #if BLAZE_BLAS_MODE
3493 
3506  template< typename VT1 // Type of the left-hand side target vector
3507  , typename VT2 // Type of the left-hand side vector operand
3508  , typename MT1 // Type of the right-hand side matrix operand
3509  , typename ST2 > // Type of the scalar value
3510  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3511  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3512  {
3513  if( IsTriangular<MT1>::value ) {
3514  assign( y, scalar * x );
3515  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3516  }
3517  else {
3518  dgemv( y, x, A, scalar, 0.0 );
3519  }
3520  }
3521 #endif
3522  //**********************************************************************************************
3523 
3524  //**BLAS-based assignment to dense vectors (single precision complex)***************************
3525 #if BLAZE_BLAS_MODE
3526 
3539  template< typename VT1 // Type of the left-hand side target vector
3540  , typename VT2 // Type of the left-hand side vector operand
3541  , typename MT1 // Type of the right-hand side matrix operand
3542  , typename ST2 > // Type of the scalar value
3543  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3544  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3545  {
3546  if( IsTriangular<MT1>::value ) {
3547  assign( y, scalar * x );
3548  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3549  }
3550  else {
3551  cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3552  }
3553  }
3554 #endif
3555  //**********************************************************************************************
3556 
3557  //**BLAS-based assignment to dense vectors (double precision complex)***************************
3558 #if BLAZE_BLAS_MODE
3559 
3572  template< typename VT1 // Type of the left-hand side target vector
3573  , typename VT2 // Type of the left-hand side vector operand
3574  , typename MT1 // Type of the right-hand side matrix operand
3575  , typename ST2 > // Type of the scalar value
3576  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3577  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3578  {
3579  if( IsTriangular<MT1>::value ) {
3580  assign( y, scalar * x );
3581  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3582  }
3583  else {
3584  zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3585  }
3586  }
3587 #endif
3588  //**********************************************************************************************
3589 
3590  //**Assignment to sparse vectors****************************************************************
3602  template< typename VT1 > // Type of the target sparse vector
3603  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3604  {
3606 
3610 
3611  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3612 
3613  const ResultType tmp( serial( rhs ) );
3614  assign( ~lhs, tmp );
3615  }
3616  //**********************************************************************************************
3617 
3618  //**Addition assignment to dense vectors********************************************************
3630  template< typename VT1 > // Type of the target dense vector
3631  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3632  {
3634 
3635  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3636 
3637  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3638  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3639 
3640  if( right.rows() == 0UL || right.columns() == 0UL ) {
3641  return;
3642  }
3643 
3644  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
3645  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3646 
3647  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3648  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3649  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3650  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3651 
3652  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
3653  }
3654  //**********************************************************************************************
3655 
3656  //**Addition assignment to dense vectors (kernel selection)*************************************
3667  template< typename VT1 // Type of the left-hand side target vector
3668  , typename VT2 // Type of the left-hand side vector operand
3669  , typename MT1 // Type of the right-hand side matrix operand
3670  , typename ST2 > // Type of the scalar value
3671  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3672  {
3673  if( ( IsDiagonal<MT1>::value ) ||
3674  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3675  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3676  selectSmallAddAssignKernel( y, x, A, scalar );
3677  else
3678  selectBlasAddAssignKernel( y, x, A, scalar );
3679  }
3680  //**********************************************************************************************
3681 
3682  //**Default addition assignment to dense vectors************************************************
3696  template< typename VT1 // Type of the left-hand side target vector
3697  , typename VT2 // Type of the left-hand side vector operand
3698  , typename MT1 // Type of the right-hand side matrix operand
3699  , typename ST2 > // Type of the scalar value
3700  static inline void selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3701  {
3702  y.addAssign( x * A * scalar );
3703  }
3704  //**********************************************************************************************
3705 
3706  //**Default addition assignment to dense vectors (small matrices)*******************************
3720  template< typename VT1 // Type of the left-hand side target vector
3721  , typename VT2 // Type of the left-hand side vector operand
3722  , typename MT1 // Type of the right-hand side matrix operand
3723  , typename ST2 > // Type of the scalar value
3724  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3725  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3726  {
3727  selectDefaultAddAssignKernel( y, x, A, scalar );
3728  }
3729  //**********************************************************************************************
3730 
3731  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3746  template< typename VT1 // Type of the left-hand side target vector
3747  , typename VT2 // Type of the left-hand side vector operand
3748  , typename MT1 // Type of the right-hand side matrix operand
3749  , typename ST2 > // Type of the scalar value
3750  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3751  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3752  {
3753  typedef IntrinsicTrait<ElementType> IT;
3754 
3755  const size_t M( A.rows() );
3756  const size_t N( A.columns() );
3757 
3758  const IntrinsicType factor( set( scalar ) );
3759 
3760  size_t j( 0UL );
3761 
3762  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL )
3763  {
3764  const size_t ibegin( ( IsLower<MT1>::value )
3765  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3766  :( 0UL ) );
3767  const size_t iend( ( IsUpper<MT1>::value )
3768  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3769  :( M ) );
3770  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3771 
3772  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3773 
3774  for( size_t i=ibegin; i<iend; ++i ) {
3775  const IntrinsicType x1( set( x[i] ) );
3776  xmm1 = xmm1 + x1 * A.load(i,j );
3777  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3778  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3779  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3780  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3781  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3782  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3783  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3784  }
3785 
3786  y.store( j , y.load(j ) + xmm1*factor );
3787  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3788  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3789  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3790  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
3791  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
3792  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
3793  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
3794  }
3795 
3796  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
3797  {
3798  const size_t ibegin( ( IsLower<MT1>::value )
3799  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3800  :( 0UL ) );
3801  const size_t iend( ( IsUpper<MT1>::value )
3802  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3803  :( M ) );
3804  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3805 
3806  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3807 
3808  for( size_t i=ibegin; i<iend; ++i ) {
3809  const IntrinsicType x1( set( x[i] ) );
3810  xmm1 = xmm1 + x1 * A.load(i,j );
3811  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3812  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3813  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3814  }
3815 
3816  y.store( j , y.load(j ) + xmm1*factor );
3817  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3818  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3819  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3820  }
3821 
3822  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL )
3823  {
3824  const size_t ibegin( ( IsLower<MT1>::value )
3825  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3826  :( 0UL ) );
3827  const size_t iend( ( IsUpper<MT1>::value )
3828  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3829  :( M ) );
3830  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3831 
3832  IntrinsicType xmm1, xmm2, xmm3;
3833 
3834  for( size_t i=ibegin; i<iend; ++i ) {
3835  const IntrinsicType x1( set( x[i] ) );
3836  xmm1 = xmm1 + x1 * A.load(i,j );
3837  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3838  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3839  }
3840 
3841  y.store( j , y.load(j ) + xmm1*factor );
3842  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3843  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3844  }
3845 
3846  for( ; (j+IT::size) < N; j+=IT::size*2UL )
3847  {
3848  const size_t ibegin( ( IsLower<MT1>::value )
3849  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3850  :( 0UL ) );
3851  const size_t iend( ( IsUpper<MT1>::value )
3852  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3853  :( M ) );
3854  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3855 
3856  IntrinsicType xmm1, xmm2;
3857 
3858  for( size_t i=ibegin; i<iend; ++i ) {
3859  const IntrinsicType x1( set( x[i] ) );
3860  xmm1 = xmm1 + x1 * A.load(i,j );
3861  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3862  }
3863 
3864  y.store( j , y.load(j ) + xmm1*factor );
3865  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
3866  }
3867 
3868  if( j < N )
3869  {
3870  const size_t ibegin( ( IsLower<MT1>::value )
3871  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3872  :( 0UL ) );
3873  const size_t iend( ( IsUpper<MT1>::value )
3874  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3875  :( M ) );
3876  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3877 
3878  IntrinsicType xmm1;
3879 
3880  for( size_t i=ibegin; i<iend; ++i ) {
3881  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3882  }
3883 
3884  y.store( j, y.load(j) + xmm1*factor );
3885  }
3886  }
3887  //**********************************************************************************************
3888 
3889  //**Default addition assignment to dense vectors (large matrices)*******************************
3903  template< typename VT1 // Type of the left-hand side target vector
3904  , typename VT2 // Type of the left-hand side vector operand
3905  , typename MT1 // Type of the right-hand side matrix operand
3906  , typename ST2 > // Type of the scalar value
3907  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3908  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3909  {
3910  selectDefaultAddAssignKernel( y, x, A, scalar );
3911  }
3912  //**********************************************************************************************
3913 
3914  //**Vectorized default addition assignment to dense vectors (large matrices)********************
3929  template< typename VT1 // Type of the left-hand side target vector
3930  , typename VT2 // Type of the left-hand side vector operand
3931  , typename MT1 // Type of the right-hand side matrix operand
3932  , typename ST2 > // Type of the scalar value
3933  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3934  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3935  {
3936  typedef IntrinsicTrait<ElementType> IT;
3937 
3938  const size_t M( A.rows() );
3939  const size_t N( A.columns() );
3940 
3941  const size_t jblock( 32768UL / sizeof( ElementType ) );
3942  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3943 
3944  const IntrinsicType factor( set( scalar ) );
3945 
3946  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
3947 
3948  for( size_t jj=0U; jj<N; jj+=jblock ) {
3949  for( size_t ii=0UL; ii<M; ii+=iblock )
3950  {
3951  const size_t iend( min( ii+iblock, M ) );
3952  const size_t jtmp( min( jj+jblock, N ) );
3953  const size_t jend( ( IsLower<MT1>::value )
3954  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3955  :( jtmp ) );
3956 
3957  size_t j( ( IsUpper<MT1>::value )
3958  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
3959  :( jj ) );
3960 
3961  for( ; (j+IT::size*7UL) < jend; j+=IT::size*8UL )
3962  {
3963  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3964 
3965  for( size_t i=ii; i<iend; ++i ) {
3966  const IntrinsicType x1( set( x[i] ) );
3967  xmm1 = xmm1 + x1 * A.load(i,j );
3968  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3969  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3970  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3971  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3972  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3973  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3974  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3975  }
3976 
3977  y.store( j , y.load(j ) + xmm1*factor );
3978  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3979  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3980  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3981  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
3982  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
3983  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
3984  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
3985  }
3986 
3987  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
3988  {
3989  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3990 
3991  for( size_t i=ii; i<iend; ++i ) {
3992  const IntrinsicType x1( set( x[i] ) );
3993  xmm1 = xmm1 + x1 * A.load(i,j );
3994  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3995  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3996  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3997  }
3998 
3999  y.store( j , y.load(j ) + xmm1*factor );
4000  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
4001  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
4002  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
4003  }
4004 
4005  for( ; (j+IT::size*2UL) < jend; j+=IT::size*3UL )
4006  {
4007  IntrinsicType xmm1, xmm2, xmm3;
4008 
4009  for( size_t i=ii; i<iend; ++i ) {
4010  const IntrinsicType x1( set( x[i] ) );
4011  xmm1 = xmm1 + x1 * A.load(i,j );
4012  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4013  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4014  }
4015 
4016  y.store( j , y.load(j ) + xmm1*factor );
4017  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
4018  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
4019  }
4020 
4021  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
4022  {
4023  IntrinsicType xmm1, xmm2;
4024 
4025  for( size_t i=ii; i<iend; ++i ) {
4026  const IntrinsicType x1( set( x[i] ) );
4027  xmm1 = xmm1 + x1 * A.load(i,j );
4028  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
4029  }
4030 
4031  y.store( j , y.load(j ) + xmm1*factor );
4032  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
4033  }
4034 
4035  if( j < jend )
4036  {
4037  IntrinsicType xmm1;
4038 
4039  for( size_t i=ii; i<iend; ++i ) {
4040  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
4041  }
4042 
4043  y.store( j, y.load(j) + xmm1*factor );
4044  }
4045  }
4046  }
4047  }
4048  //**********************************************************************************************
4049 
4050  //**BLAS-based addition assignment to dense vectors (default)***********************************
4064  template< typename VT1 // Type of the left-hand side target vector
4065  , typename VT2 // Type of the left-hand side vector operand
4066  , typename MT1 // Type of the right-hand side matrix operand
4067  , typename ST2 > // Type of the scalar value
4068  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4069  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4070  {
4071  selectLargeAddAssignKernel( y, x, A, scalar );
4072  }
4073  //**********************************************************************************************
4074 
4075  //**BLAS-based addition assignment to dense vectors (single precision)**************************
4076 #if BLAZE_BLAS_MODE
4077 
4090  template< typename VT1 // Type of the left-hand side target vector
4091  , typename VT2 // Type of the left-hand side vector operand
4092  , typename MT1 // Type of the right-hand side matrix operand
4093  , typename ST2 > // Type of the scalar value
4094  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4095  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4096  {
4097  if( IsTriangular<MT1>::value ) {
4098  typename VT1::ResultType tmp( scalar * x );
4099  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4100  addAssign( y, tmp );
4101  }
4102  else {
4103  sgemv( y, x, A, scalar, 1.0F );
4104  }
4105  }
4106 #endif
4107  //**********************************************************************************************
4108 
4109  //**BLAS-based addition assignment to dense vectors (double precision)**************************
4110 #if BLAZE_BLAS_MODE
4111 
4124  template< typename VT1 // Type of the left-hand side target vector
4125  , typename VT2 // Type of the left-hand side vector operand
4126  , typename MT1 // Type of the right-hand side matrix operand
4127  , typename ST2 > // Type of the scalar value
4128  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4129  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4130  {
4131  if( IsTriangular<MT1>::value ) {
4132  typename VT1::ResultType tmp( scalar * x );
4133  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4134  addAssign( y, tmp );
4135  }
4136  else {
4137  dgemv( y, x, A, scalar, 1.0 );
4138  }
4139  }
4140 #endif
4141  //**********************************************************************************************
4142 
4143  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
4144 #if BLAZE_BLAS_MODE
4145 
4158  template< typename VT1 // Type of the left-hand side target vector
4159  , typename VT2 // Type of the left-hand side vector operand
4160  , typename MT1 // Type of the right-hand side matrix operand
4161  , typename ST2 > // Type of the scalar value
4162  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4163  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4164  {
4165  if( IsTriangular<MT1>::value ) {
4166  typename VT1::ResultType tmp( scalar * x );
4167  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4168  addAssign( y, tmp );
4169  }
4170  else {
4171  cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4172  }
4173  }
4174 #endif
4175  //**********************************************************************************************
4176 
4177  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
4178 #if BLAZE_BLAS_MODE
4179 
4192  template< typename VT1 // Type of the left-hand side target vector
4193  , typename VT2 // Type of the left-hand side vector operand
4194  , typename MT1 // Type of the right-hand side matrix operand
4195  , typename ST2 > // Type of the scalar value
4196  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4197  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4198  {
4199  if( IsTriangular<MT1>::value ) {
4200  typename VT1::ResultType tmp( scalar * x );
4201  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4202  addAssign( y, tmp );
4203  }
4204  else {
4205  zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4206  }
4207  }
4208 #endif
4209  //**********************************************************************************************
4210 
4211  //**Addition assignment to sparse vectors*******************************************************
4212  // No special implementation for the addition assignment to sparse vectors.
4213  //**********************************************************************************************
4214 
4215  //**Subtraction assignment to dense vectors*****************************************************
4227  template< typename VT1 > // Type of the target dense vector
4228  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4229  {
4231 
4232  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4233 
4234  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4235  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4236 
4237  if( right.rows() == 0UL || right.columns() == 0UL ) {
4238  return;
4239  }
4240 
4241  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
4242  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4243 
4244  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4245  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4246  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
4247  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
4248 
4249  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
4250  }
4251  //**********************************************************************************************
4252 
4253  //**Subtraction assignment to dense vectors (kernel selection)**********************************
4264  template< typename VT1 // Type of the left-hand side target vector
4265  , typename VT2 // Type of the left-hand side vector operand
4266  , typename MT1 // Type of the right-hand side matrix operand
4267  , typename ST2 > // Type of the scalar value
4268  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4269  {
4270  if( ( IsDiagonal<MT1>::value ) ||
4271  ( IsComputation<MT>::value && !evaluateMatrix ) ||
4272  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
4273  selectSmallSubAssignKernel( y, x, A, scalar );
4274  else
4275  selectBlasSubAssignKernel( y, x, A, scalar );
4276  }
4277  //**********************************************************************************************
4278 
4279  //**Default subtraction assignment to dense vectors*********************************************
4293  template< typename VT1 // Type of the left-hand side target vector
4294  , typename VT2 // Type of the left-hand side vector operand
4295  , typename MT1 // Type of the right-hand side matrix operand
4296  , typename ST2 > // Type of the scalar value
4297  static inline void selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4298  {
4299  y.subAssign( x * A * scalar );
4300  }
4301  //**********************************************************************************************
4302 
4303  //**Default subtraction assignment to dense vectors (small matrices)****************************
4317  template< typename VT1 // Type of the left-hand side target vector
4318  , typename VT2 // Type of the left-hand side vector operand
4319  , typename MT1 // Type of the right-hand side matrix operand
4320  , typename ST2 > // Type of the scalar value
4321  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4322  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4323  {
4324  selectDefaultSubAssignKernel( y, x, A, scalar );
4325  }
4326  //**********************************************************************************************
4327 
4328  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
4343  template< typename VT1 // Type of the left-hand side target vector
4344  , typename VT2 // Type of the left-hand side vector operand
4345  , typename MT1 // Type of the right-hand side matrix operand
4346  , typename ST2 > // Type of the scalar value
4347  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4348  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4349  {
4350  typedef IntrinsicTrait<ElementType> IT;
4351 
4352  const size_t M( A.rows() );
4353  const size_t N( A.columns() );
4354 
4355  const IntrinsicType factor( set( scalar ) );
4356 
4357  size_t j( 0UL );
4358 
4359  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL )
4360  {
4361  const size_t ibegin( ( IsLower<MT1>::value )
4362  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4363  :( 0UL ) );
4364  const size_t iend( ( IsUpper<MT1>::value )
4365  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4366  :( M ) );
4367  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4368 
4369  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4370 
4371  for( size_t i=ibegin; i<iend; ++i ) {
4372  const IntrinsicType x1( set( x[i] ) );
4373  xmm1 = xmm1 + x1 * A.load(i,j );
4374  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4375  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4376  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
4377  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
4378  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
4379  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
4380  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
4381  }
4382 
4383  y.store( j , y.load(j ) - xmm1*factor );
4384  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4385  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4386  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
4387  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
4388  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
4389  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
4390  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
4391  }
4392 
4393  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
4394  {
4395  const size_t ibegin( ( IsLower<MT1>::value )
4396  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4397  :( 0UL ) );
4398  const size_t iend( ( IsUpper<MT1>::value )
4399  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4400  :( M ) );
4401  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4402 
4403  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4404 
4405  for( size_t i=ibegin; i<iend; ++i ) {
4406  const IntrinsicType x1( set( x[i] ) );
4407  xmm1 = xmm1 + x1 * A.load(i,j );
4408  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4409  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4410  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
4411  }
4412 
4413  y.store( j , y.load(j ) - xmm1*factor );
4414  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4415  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4416  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
4417  }
4418 
4419  for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL )
4420  {
4421  const size_t ibegin( ( IsLower<MT1>::value )
4422  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4423  :( 0UL ) );
4424  const size_t iend( ( IsUpper<MT1>::value )
4425  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4426  :( M ) );
4427  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4428 
4429  IntrinsicType xmm1, xmm2, xmm3;
4430 
4431  for( size_t i=ibegin; i<iend; ++i ) {
4432  const IntrinsicType x1( set( x[i] ) );
4433  xmm1 = xmm1 + x1 * A.load(i,j );
4434  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4435  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4436  }
4437 
4438  y.store( j , y.load(j ) - xmm1*factor );
4439  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4440  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4441  }
4442 
4443  for( ; (j+IT::size) < N; j+=IT::size*2UL )
4444  {
4445  const size_t ibegin( ( IsLower<MT1>::value )
4446  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4447  :( 0UL ) );
4448  const size_t iend( ( IsUpper<MT1>::value )
4449  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4450  :( M ) );
4451  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4452 
4453  IntrinsicType xmm1, xmm2;
4454 
4455  for( size_t i=ibegin; i<iend; ++i ) {
4456  const IntrinsicType x1( set( x[i] ) );
4457  xmm1 = xmm1 + x1 * A.load(i,j );
4458  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
4459  }
4460 
4461  y.store( j , y.load(j ) - xmm1*factor );
4462  y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
4463  }
4464 
4465  if( j < N )
4466  {
4467  const size_t ibegin( ( IsLower<MT1>::value )
4468  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4469  :( 0UL ) );
4470  const size_t iend( ( IsUpper<MT1>::value )
4471  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4472  :( M ) );
4473  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4474 
4475  IntrinsicType xmm1;
4476 
4477  for( size_t i=ibegin; i<iend; ++i ) {
4478  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
4479  }
4480 
4481  y.store( j, y.load(j) - xmm1*factor );
4482  }
4483  }
4484  //**********************************************************************************************
4485 
4486  //**Default subtraction assignment to dense vectors (large matrices)****************************
4500  template< typename VT1 // Type of the left-hand side target vector
4501  , typename VT2 // Type of the left-hand side vector operand
4502  , typename MT1 // Type of the right-hand side matrix operand
4503  , typename ST2 > // Type of the scalar value
4504  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4505  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4506  {
4507  selectDefaultSubAssignKernel( y, x, A, scalar );
4508  }
4509  //**********************************************************************************************
4510 
4511  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4526  template< typename VT1 // Type of the left-hand side target vector
4527  , typename VT2 // Type of the left-hand side vector operand
4528  , typename MT1 // Type of the right-hand side matrix operand
4529  , typename ST2 > // Type of the scalar value
4530  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4531  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4532  {
4533  typedef IntrinsicTrait<ElementType> IT;
4534 
4535  const size_t M( A.rows() );
4536  const size_t N( A.columns() );
4537 
4538  const size_t jblock( 32768UL / sizeof( ElementType ) );
4539  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
4540 
4541  const IntrinsicType factor( set( scalar ) );
4542 
4543  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
4544 
4545  for( size_t jj=0U; jj<N; jj+=jblock ) {
4546  for( size_t ii=0UL; ii<M; ii+=iblock )
4547  {
4548  const size_t iend( min( ii+iblock, M ) );
4549  const size_t jtmp( min( jj+jblock, N ) );
4550  const size_t jend( ( IsLower<MT1>::value )
4551  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
4552  :( jtmp ) );
4553 
4554  size_t j( ( IsUpper<MT1>::value )
4555  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
4556  :( jj ) );
4557 
4558  for( ; (j+IT::size*7UL) < jend; j+=IT::size*8UL )
4559  {
4560  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4561 
4562  for( size_t i=ii; i<iend; ++i ) {
4563  const IntrinsicType x1( set( x[i] ) );
4564  xmm1 = xmm1 + x1 * A.load(i,j );
4565  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4566  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4567  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
4568  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
4569  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
4570  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
4571  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
4572  }
4573 
4574  y.store( j , y.load(j ) - xmm1*factor );
4575  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4576  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4577  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
4578  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
4579  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
4580  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
4581  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
4582  }
4583 
4584  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
4585  {
4586  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4587 
4588  for( size_t i=ii; i<iend; ++i ) {
4589  const IntrinsicType x1( set( x[i] ) );
4590  xmm1 = xmm1 + x1 * A.load(i,j );
4591  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4592  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4593  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
4594  }
4595 
4596  y.store( j , y.load(j ) - xmm1*factor );
4597  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4598  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4599  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
4600  }
4601 
4602  for( ; (j+IT::size*2UL) < jend; j+=IT::size*3UL )
4603  {
4604  IntrinsicType xmm1, xmm2, xmm3;
4605 
4606  for( size_t i=ii; i<iend; ++i ) {
4607  const IntrinsicType x1( set( x[i] ) );
4608  xmm1 = xmm1 + x1 * A.load(i,j );
4609  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4610  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4611  }
4612 
4613  y.store( j , y.load(j ) - xmm1*factor );
4614  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4615  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4616  }
4617 
4618  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
4619  {
4620  IntrinsicType xmm1, xmm2;
4621 
4622  for( size_t i=ii; i<iend; ++i ) {
4623  const IntrinsicType x1( set( x[i] ) );
4624  xmm1 = xmm1 + x1 * A.load(i,j );
4625  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
4626  }
4627 
4628  y.store( j , y.load(j ) - xmm1*factor );
4629  y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
4630  }
4631 
4632  if( j < jend )
4633  {
4634  IntrinsicType xmm1;
4635 
4636  for( size_t i=ii; i<iend; ++i ) {
4637  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
4638  }
4639 
4640  y.store( j, y.load(j) - xmm1*factor );
4641  }
4642  }
4643  }
4644  }
4645  //**********************************************************************************************
4646 
4647  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4661  template< typename VT1 // Type of the left-hand side target vector
4662  , typename VT2 // Type of the left-hand side vector operand
4663  , typename MT1 // Type of the right-hand side matrix operand
4664  , typename ST2 > // Type of the scalar value
4665  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4666  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4667  {
4668  selectLargeSubAssignKernel( y, x, A, scalar );
4669  }
4670  //**********************************************************************************************
4671 
4672  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
4673 #if BLAZE_BLAS_MODE
4674 
4687  template< typename VT1 // Type of the left-hand side target vector
4688  , typename VT2 // Type of the left-hand side vector operand
4689  , typename MT1 // Type of the right-hand side matrix operand
4690  , typename ST2 > // Type of the scalar value
4691  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4692  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4693  {
4694  if( IsTriangular<MT1>::value ) {
4695  typename VT1::ResultType tmp( scalar * x );
4696  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4697  subAssign( y, tmp );
4698  }
4699  else {
4700  sgemv( y, x, A, -scalar, 1.0F );
4701  }
4702  }
4703 #endif
4704  //**********************************************************************************************
4705 
4706  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
4707 #if BLAZE_BLAS_MODE
4708 
4721  template< typename VT1 // Type of the left-hand side target vector
4722  , typename VT2 // Type of the left-hand side vector operand
4723  , typename MT1 // Type of the right-hand side matrix operand
4724  , typename ST2 > // Type of the scalar value
4725  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4726  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4727  {
4728  if( IsTriangular<MT1>::value ) {
4729  typename VT1::ResultType tmp( scalar * x );
4730  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4731  subAssign( y, tmp );
4732  }
4733  else {
4734  dgemv( y, x, A, -scalar, 1.0 );
4735  }
4736  }
4737 #endif
4738  //**********************************************************************************************
4739 
4740  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
4741 #if BLAZE_BLAS_MODE
4742 
4755  template< typename VT1 // Type of the left-hand side target vector
4756  , typename VT2 // Type of the left-hand side vector operand
4757  , typename MT1 // Type of the right-hand side matrix operand
4758  , typename ST2 > // Type of the scalar value
4759  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4760  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4761  {
4762  if( IsTriangular<MT1>::value ) {
4763  typename VT1::ResultType tmp( scalar * x );
4764  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4765  subAssign( y, tmp );
4766  }
4767  else {
4768  cgemv( y, x, A, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4769  }
4770  }
4771 #endif
4772  //**********************************************************************************************
4773 
4774  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
4775 #if BLAZE_BLAS_MODE
4776 
4789  template< typename VT1 // Type of the left-hand side target vector
4790  , typename VT2 // Type of the left-hand side vector operand
4791  , typename MT1 // Type of the right-hand side matrix operand
4792  , typename ST2 > // Type of the scalar value
4793  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4794  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4795  {
4796  if( IsTriangular<MT1>::value ) {
4797  typename VT1::ResultType tmp( scalar * x );
4798  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4799  subAssign( y, tmp );
4800  }
4801  else {
4802  zgemv( y, x, A, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4803  }
4804  }
4805 #endif
4806  //**********************************************************************************************
4807 
4808  //**Subtraction assignment to sparse vectors****************************************************
4809  // No special implementation for the subtraction assignment to sparse vectors.
4810  //**********************************************************************************************
4811 
4812  //**Multiplication assignment to dense vectors**************************************************
4824  template< typename VT1 > // Type of the target dense vector
4825  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4826  {
4828 
4832 
4833  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4834 
4835  const ResultType tmp( serial( rhs ) );
4836  multAssign( ~lhs, tmp );
4837  }
4838  //**********************************************************************************************
4839 
4840  //**Multiplication assignment to sparse vectors*************************************************
4841  // No special implementation for the multiplication assignment to sparse vectors.
4842  //**********************************************************************************************
4843 
4844  //**SMP assignment to dense vectors*************************************************************
4858  template< typename VT1 > // Type of the target dense vector
4859  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4860  smpAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4861  {
4863 
4864  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4865 
4866  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4867  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4868 
4869  if( right.rows() == 0UL ) {
4870  reset( ~lhs );
4871  return;
4872  }
4873  else if( right.columns() == 0UL ) {
4874  return;
4875  }
4876 
4877  LT x( left ); // Evaluation of the left-hand side dense vector operand
4878  RT A( right ); // Evaluation of the right-hand side dense matrix operand
4879 
4880  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4881  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4882  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
4883  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
4884 
4885  smpAssign( ~lhs, x * A * rhs.scalar_ );
4886  }
4887  //**********************************************************************************************
4888 
4889  //**SMP assignment to sparse vectors************************************************************
4903  template< typename VT1 > // Type of the target sparse vector
4904  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4905  smpAssign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4906  {
4908 
4912 
4913  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4914 
4915  const ResultType tmp( rhs );
4916  smpAssign( ~lhs, tmp );
4917  }
4918  //**********************************************************************************************
4919 
4920  //**SMP addition assignment to dense vectors****************************************************
4934  template< typename VT1 > // Type of the target dense vector
4935  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4936  smpAddAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4937  {
4939 
4940  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4941 
4942  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4943  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4944 
4945  if( right.rows() == 0UL || right.columns() == 0UL ) {
4946  return;
4947  }
4948 
4949  LT x( left ); // Evaluation of the left-hand side dense vector operand
4950  RT A( right ); // Evaluation of the right-hand side dense matrix operand
4951 
4952  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4953  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4954  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
4955  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
4956 
4957  smpAddAssign( ~lhs, x * A * rhs.scalar_ );
4958  }
4959  //**********************************************************************************************
4960 
4961  //**SMP addition assignment to sparse vectors***************************************************
4962  // No special implementation for the SMP addition assignment to sparse vectors.
4963  //**********************************************************************************************
4964 
4965  //**SMP subtraction assignment to dense vectors*************************************************
4979  template< typename VT1 > // Type of the target dense vector
4980  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4981  smpSubAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4982  {
4984 
4985  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4986 
4987  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4988  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4989 
4990  if( right.rows() == 0UL || right.columns() == 0UL ) {
4991  return;
4992  }
4993 
4994  LT x( left ); // Evaluation of the left-hand side dense vector operand
4995  RT A( right ); // Evaluation of the right-hand side dense matrix operand
4996 
4997  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4998  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4999  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
5000  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
5001 
5002  smpSubAssign( ~lhs, x * A * rhs.scalar_ );
5003  }
5004  //**********************************************************************************************
5005 
5006  //**SMP subtraction assignment to sparse vectors************************************************
5007  // No special implementation for the SMP subtraction assignment to sparse vectors.
5008  //**********************************************************************************************
5009 
5010  //**SMP multiplication assignment to dense vectors**********************************************
5025  template< typename VT1 > // Type of the target dense vector
5026  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5027  smpMultAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
5028  {
5030 
5034 
5035  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5036 
5037  const ResultType tmp( rhs );
5038  smpMultAssign( ~lhs, tmp );
5039  }
5040  //**********************************************************************************************
5041 
5042  //**SMP multiplication assignment to sparse vectors*********************************************
5043  // No special implementation for the SMP multiplication assignment to sparse vectors.
5044  //**********************************************************************************************
5045 
5046  //**Compile time checks*************************************************************************
5054  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
5055  //**********************************************************************************************
5056 };
5058 //*************************************************************************************************
5059 
5060 
5061 
5062 
5063 //=================================================================================================
5064 //
5065 // GLOBAL BINARY ARITHMETIC OPERATORS
5066 //
5067 //=================================================================================================
5068 
5069 //*************************************************************************************************
5100 template< typename T1 // Type of the left-hand side dense vector
5101  , typename T2 > // Type of the right-hand side dense matrix
5102 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
5104 {
5106 
5107  if( (~vec).size() != (~mat).rows() )
5108  throw std::invalid_argument( "Vector and matrix sizes do not match" );
5109 
5110  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
5111 }
5112 //*************************************************************************************************
5113 
5114 
5115 
5116 
5117 //=================================================================================================
5118 //
5119 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
5120 //
5121 //=================================================================================================
5122 
5123 //*************************************************************************************************
5136 template< typename T1 // Type of the left-hand side dense vector
5137  , typename T2 // Type of the right-hand side dense matrix
5138  , bool SO > // Storage order of the right-hand side dense matrix
5139 inline const typename EnableIf< IsMatMatMultExpr<T2>, typename MultExprTrait<T1,T2>::Type >::Type
5141 {
5143 
5145 
5146  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
5147 }
5148 //*************************************************************************************************
5149 
5150 
5151 
5152 
5153 //=================================================================================================
5154 //
5155 // SIZE SPECIALIZATIONS
5156 //
5157 //=================================================================================================
5158 
5159 //*************************************************************************************************
5161 template< typename MT, typename VT >
5162 struct Size< TDVecDMatMultExpr<MT,VT> >
5163  : public Columns<MT>
5164 {};
5166 //*************************************************************************************************
5167 
5168 
5169 
5170 
5171 //=================================================================================================
5172 //
5173 // EXPRESSION TRAIT SPECIALIZATIONS
5174 //
5175 //=================================================================================================
5176 
5177 //*************************************************************************************************
5179 template< typename VT, typename MT, bool AF >
5180 struct SubvectorExprTrait< TDVecDMatMultExpr<VT,MT>, AF >
5181 {
5182  public:
5183  //**********************************************************************************************
5184  typedef typename MultExprTrait< typename SubvectorExprTrait<const VT,AF>::Type
5185  , typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
5186  //**********************************************************************************************
5187 };
5189 //*************************************************************************************************
5190 
5191 } // namespace blaze
5192 
5193 #endif
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:120
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:278
Header file for mathematical functions.
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:377
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:123
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:316
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:441
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:291
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:367
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:279
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:276
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:274
Constraint on the data type.
Header file for the MultExprTrait class template.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:431
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:387
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:125
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:263
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:327
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:121
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:313
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:288
Header file for the IsMatMatMultExpr type trait class.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:285
const size_t SMP_TDVECDMATMULT_THRESHOLD
SMP dense vector/row-major dense matrix multiplication threshold.This threshold specifies when a dens...
Definition: Thresholds.h:368
Header file for the IsBlasCompatible type trait.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:275
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:411
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:277
Constraints on the storage order of matrix types.
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:442
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:421
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:273
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
Header file for BLAS level 2 functions.
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:124
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:150
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Constraint on the data type.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
Header file for the TVecMatMultExpr base class.
Constraint on the data type.
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:133
Header file for the HasMutableDataAccess type trait.
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Header file for all intrinsic functionality.
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid vector/matrix ...
Definition: TVecMatMultExpr.h:166
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:122
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:399
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the IsUpper type trait.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:282
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849