TDVecTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <blaze/math/blas/Level2.h>
55 #include <blaze/math/Intrinsics.h>
56 #include <blaze/math/shims/Reset.h>
77 #include <blaze/system/BLAS.h>
79 #include <blaze/util/Assert.h>
80 #include <blaze/util/Complex.h>
83 #include <blaze/util/DisableIf.h>
84 #include <blaze/util/EnableIf.h>
86 #include <blaze/util/SelectType.h>
87 #include <blaze/util/Types.h>
93 
94 
95 namespace blaze {
96 
97 //=================================================================================================
98 //
99 // CLASS TDVECTDMATMULTEXPR
100 //
101 //=================================================================================================
102 
103 //*************************************************************************************************
110 template< typename VT // Type of the left-hand side dense vector
111  , typename MT > // Type of the right-hand side dense matrix
112 class TDVecTDMatMultExpr : public DenseVector< TDVecTDMatMultExpr<VT,MT>, true >
113  , private TVecMatMultExpr
114  , private Computation
115 {
116  private:
117  //**Type definitions****************************************************************************
118  typedef typename VT::ResultType VRT;
119  typedef typename MT::ResultType MRT;
120  typedef typename VRT::ElementType VET;
121  typedef typename MRT::ElementType MET;
122  typedef typename VT::CompositeType VCT;
123  typedef typename MT::CompositeType MCT;
124  //**********************************************************************************************
125 
126  //**********************************************************************************************
128  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
129  //**********************************************************************************************
130 
131  //**********************************************************************************************
133  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
135  //**********************************************************************************************
136 
137  //**********************************************************************************************
139 
143  template< typename T1 >
144  struct UseSMPAssign {
145  enum { value = ( evaluateVector || evaluateMatrix ) };
146  };
148  //**********************************************************************************************
149 
150  //**********************************************************************************************
152 
156  template< typename T1, typename T2, typename T3 >
157  struct UseSinglePrecisionKernel {
158  enum { value = BLAZE_BLAS_MODE &&
159  HasMutableDataAccess<T1>::value &&
160  HasConstDataAccess<T2>::value &&
161  HasConstDataAccess<T3>::value &&
162  !IsDiagonal<T3>::value &&
163  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
164  IsFloat<typename T1::ElementType>::value &&
165  IsFloat<typename T2::ElementType>::value &&
166  IsFloat<typename T3::ElementType>::value };
167  };
169  //**********************************************************************************************
170 
171  //**********************************************************************************************
173 
177  template< typename T1, typename T2, typename T3 >
178  struct UseDoublePrecisionKernel {
179  enum { value = BLAZE_BLAS_MODE &&
180  HasMutableDataAccess<T1>::value &&
181  HasConstDataAccess<T2>::value &&
182  HasConstDataAccess<T3>::value &&
183  !IsDiagonal<T3>::value &&
184  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
185  IsDouble<typename T1::ElementType>::value &&
186  IsDouble<typename T2::ElementType>::value &&
187  IsDouble<typename T3::ElementType>::value };
188  };
190  //**********************************************************************************************
191 
192  //**********************************************************************************************
194 
198  template< typename T1, typename T2, typename T3 >
199  struct UseSinglePrecisionComplexKernel {
200  typedef complex<float> Type;
201  enum { value = BLAZE_BLAS_MODE &&
202  HasMutableDataAccess<T1>::value &&
203  HasConstDataAccess<T2>::value &&
204  HasConstDataAccess<T3>::value &&
205  !IsDiagonal<T3>::value &&
206  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
207  IsSame<typename T1::ElementType,Type>::value &&
208  IsSame<typename T2::ElementType,Type>::value &&
209  IsSame<typename T3::ElementType,Type>::value };
210  };
212  //**********************************************************************************************
213 
214  //**********************************************************************************************
216 
220  template< typename T1, typename T2, typename T3 >
221  struct UseDoublePrecisionComplexKernel {
222  typedef complex<double> Type;
223  enum { value = BLAZE_BLAS_MODE &&
224  HasMutableDataAccess<T1>::value &&
225  HasConstDataAccess<T2>::value &&
226  HasConstDataAccess<T3>::value &&
227  !IsDiagonal<T3>::value &&
228  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
229  IsSame<typename T1::ElementType,Type>::value &&
230  IsSame<typename T2::ElementType,Type>::value &&
231  IsSame<typename T3::ElementType,Type>::value };
232  };
234  //**********************************************************************************************
235 
236  //**********************************************************************************************
238 
241  template< typename T1, typename T2, typename T3 >
242  struct UseDefaultKernel {
243  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
244  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
245  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
246  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
247  };
249  //**********************************************************************************************
250 
251  //**********************************************************************************************
253 
257  template< typename T1, typename T2, typename T3 >
258  struct UseVectorizedDefaultKernel {
259  enum { value = !IsDiagonal<T3>::value &&
260  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
261  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
262  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
263  IntrinsicTrait<typename T1::ElementType>::addition &&
264  IntrinsicTrait<typename T1::ElementType>::multiplication };
265  };
267  //**********************************************************************************************
268 
269  public:
270  //**Type definitions****************************************************************************
276  typedef const ElementType ReturnType;
277  typedef const ResultType CompositeType;
278 
280  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
281 
283  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
284 
287 
290  //**********************************************************************************************
291 
292  //**Compilation flags***************************************************************************
294  enum { vectorizable = !IsDiagonal<MT>::value &&
295  VT::vectorizable && MT::vectorizable &&
299 
301  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
302  !evaluateMatrix && MT::smpAssignable };
303  //**********************************************************************************************
304 
305  //**Constructor*********************************************************************************
311  explicit inline TDVecTDMatMultExpr( const VT& vec, const MT& mat )
312  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
313  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
314  {
315  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
316  }
317  //**********************************************************************************************
318 
319  //**Subscript operator**************************************************************************
325  inline ReturnType operator[]( size_t index ) const {
326  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
327 
328  if( ( IsStrictlyLower<MT>::value && index == mat_.columns()-1UL ) ||
329  ( IsStrictlyUpper<MT>::value && index == 0UL ) ||
330  mat_.rows() == 0UL )
331  return ElementType();
332 
334  return vec_[index] * mat_(index,index);
335 
336  const size_t ibegin( ( IsLower<MT>::value )
337  ?( IsStrictlyLower<MT>::value ? index+1UL : index )
338  :( 0UL ) );
339  const size_t iend( ( IsUpper<MT>::value )
340  ?( IsStrictlyUpper<MT>::value ? index : index+1UL )
341  :( mat_.rows() ) );
342  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
343 
344  const size_t inum( iend - ibegin );
345  const size_t ipos( ibegin + ( ( inum - 1UL ) & size_t(-2) ) + 1UL );
346 
347  ElementType res( vec_[ibegin] * mat_(ibegin,index) );
348 
349  for( size_t i=ibegin+1UL; i<ipos; i+=2UL ) {
350  res += vec_[i] * mat_(i,index) + vec_[i+1UL] * mat_(i+1UL,index);
351  }
352  if( ipos < iend ) {
353  res += vec_[ipos] * mat_(ipos,index);
354  }
355 
356  return res;
357  }
358  //**********************************************************************************************
359 
360  //**Size function*******************************************************************************
365  inline size_t size() const {
366  return mat_.columns();
367  }
368  //**********************************************************************************************
369 
370  //**Left operand access*************************************************************************
375  inline LeftOperand leftOperand() const {
376  return vec_;
377  }
378  //**********************************************************************************************
379 
380  //**Right operand access************************************************************************
385  inline RightOperand rightOperand() const {
386  return mat_;
387  }
388  //**********************************************************************************************
389 
390  //**********************************************************************************************
396  template< typename T >
397  inline bool canAlias( const T* alias ) const {
398  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
399  }
400  //**********************************************************************************************
401 
402  //**********************************************************************************************
408  template< typename T >
409  inline bool isAliased( const T* alias ) const {
410  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
411  }
412  //**********************************************************************************************
413 
414  //**********************************************************************************************
419  inline bool isAligned() const {
420  return vec_.isAligned() && mat_.isAligned();
421  }
422  //**********************************************************************************************
423 
424  //**********************************************************************************************
429  inline bool canSMPAssign() const {
430  return ( !BLAZE_BLAS_IS_PARALLEL ||
431  ( IsComputation<MT>::value && !evaluateMatrix ) ||
432  ( mat_.rows() * mat_.columns() < TDVECTDMATMULT_THRESHOLD ) ) &&
434  }
435  //**********************************************************************************************
436 
437  private:
438  //**Member variables****************************************************************************
439  LeftOperand vec_;
440  RightOperand mat_;
441  //**********************************************************************************************
442 
443  //**Assignment to dense vectors*****************************************************************
456  template< typename VT1 > // Type of the target dense vector
457  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
458  {
460 
461  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
462 
463  if( rhs.mat_.rows() == 0UL ) {
464  reset( ~lhs );
465  return;
466  }
467  else if( rhs.mat_.columns() == 0UL ) {
468  return;
469  }
470 
471  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
472  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
473 
474  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
475  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
476  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
477  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
478 
479  TDVecTDMatMultExpr::selectAssignKernel( ~lhs, x, A );
480  }
482  //**********************************************************************************************
483 
484  //**Assignment to dense vectors (kernel selection)**********************************************
495  template< typename VT1 // Type of the left-hand side target vector
496  , typename VT2 // Type of the left-hand side vector operand
497  , typename MT1 > // Type of the right-hand side matrix operand
498  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
499  {
500  if( ( IsDiagonal<MT1>::value ) ||
501  ( IsComputation<MT>::value && !evaluateMatrix ) ||
502  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
503  selectSmallAssignKernel( y, x, A );
504  else
505  selectBlasAssignKernel( y, x, A );
506  }
508  //**********************************************************************************************
509 
510  //**Default assignment to dense vectors*********************************************************
524  template< typename VT1 // Type of the left-hand side target vector
525  , typename VT2 // Type of the left-hand side vector operand
526  , typename MT1 > // Type of the right-hand side matrix operand
527  static inline void selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
528  {
529  y.assign( x * A );
530  }
532  //**********************************************************************************************
533 
534  //**Default assignment to dense vectors (small matrices)****************************************
548  template< typename VT1 // Type of the left-hand side target vector
549  , typename VT2 // Type of the left-hand side vector operand
550  , typename MT1 > // Type of the right-hand side matrix operand
551  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
552  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A )
553  {
554  selectDefaultAssignKernel( y, x, A );
555  }
557  //**********************************************************************************************
558 
559  //**Vectorized default assignment to dense vectors (small matrices)*****************************
573  template< typename VT1 // Type of the left-hand side target vector
574  , typename VT2 // Type of the left-hand side vector operand
575  , typename MT1 > // Type of the right-hand side matrix operand
576  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
577  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A )
578  {
579  typedef IntrinsicTrait<ElementType> IT;
580 
581  const size_t M( A.rows() );
582  const size_t N( A.columns() );
583 
584  size_t j( 0UL );
585 
586  for( ; (j+8UL) <= N; j+=8UL )
587  {
588  const size_t ibegin( ( IsLower<MT1>::value )
589  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
590  :( 0UL ) );
591  const size_t iend( ( IsUpper<MT1>::value )
592  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
593  :( M ) );
594  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
595 
596  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
597 
598  for( size_t i=ibegin; i<iend; i+=IT::size ) {
599  const IntrinsicType x1( x.load(i) );
600  xmm1 = xmm1 + x1 * A.load(i,j );
601  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
602  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
603  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
604  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
605  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
606  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
607  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
608  }
609 
610  y[j ] = sum( xmm1 );
611  y[j+1UL] = sum( xmm2 );
612  y[j+2UL] = sum( xmm3 );
613  y[j+3UL] = sum( xmm4 );
614  y[j+4UL] = sum( xmm5 );
615  y[j+5UL] = sum( xmm6 );
616  y[j+6UL] = sum( xmm7 );
617  y[j+7UL] = sum( xmm8 );
618  }
619 
620  for( ; (j+4UL) <= N; j+=4UL )
621  {
622  const size_t ibegin( ( IsLower<MT1>::value )
623  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
624  :( 0UL ) );
625  const size_t iend( ( IsUpper<MT1>::value )
626  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
627  :( M ) );
628  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
629 
630  IntrinsicType xmm1, xmm2, xmm3, xmm4;
631 
632  for( size_t i=ibegin; i<iend; i+=IT::size ) {
633  const IntrinsicType x1( x.load(i) );
634  xmm1 = xmm1 + x1 * A.load(i,j );
635  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
636  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
637  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
638  }
639 
640  y[j ] = sum( xmm1 );
641  y[j+1UL] = sum( xmm2 );
642  y[j+2UL] = sum( xmm3 );
643  y[j+3UL] = sum( xmm4 );
644  }
645 
646  for( ; (j+3UL) <= N; j+=3UL )
647  {
648  const size_t ibegin( ( IsLower<MT1>::value )
649  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
650  :( 0UL ) );
651  const size_t iend( ( IsUpper<MT1>::value )
652  ?( IsStrictlyUpper<MT1>::value ? j+2UL : j+3UL )
653  :( M ) );
654  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
655 
656  IntrinsicType xmm1, xmm2, xmm3;
657 
658  for( size_t i=ibegin; i<iend; i+=IT::size ) {
659  const IntrinsicType x1( x.load(i) );
660  xmm1 = xmm1 + x1 * A.load(i,j );
661  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
662  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
663  }
664 
665  y[j ] = sum( xmm1 );
666  y[j+1UL] = sum( xmm2 );
667  y[j+2UL] = sum( xmm3 );
668  }
669 
670  for( ; (j+2UL) <= N; j+=2UL )
671  {
672  const size_t ibegin( ( IsLower<MT1>::value )
673  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
674  :( 0UL ) );
675  const size_t iend( ( IsUpper<MT1>::value )
676  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
677  :( M ) );
678  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
679 
680  IntrinsicType xmm1, xmm2;
681 
682  for( size_t i=ibegin; i<iend; i+=IT::size ) {
683  const IntrinsicType x1( x.load(i) );
684  xmm1 = xmm1 + x1 * A.load(i,j );
685  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
686  }
687 
688  y[j ] = sum( xmm1 );
689  y[j+1UL] = sum( xmm2 );
690  }
691 
692  if( j < N )
693  {
694  const size_t ibegin( ( IsLower<MT1>::value )
695  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
696  :( 0UL ) );
697  const size_t iend( ( IsUpper<MT1>::value )
698  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
699  :( M ) );
700  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
701 
702  IntrinsicType xmm1;
703 
704  for( size_t i=ibegin; i<iend; i+=IT::size ) {
705  xmm1 = xmm1 + x.load(i) * A.load(i,j);
706  }
707 
708  y[j] = sum( xmm1 );
709  }
710  }
712  //**********************************************************************************************
713 
714  //**Default assignment to dense vectors (large matrices)****************************************
728  template< typename VT1 // Type of the left-hand side target vector
729  , typename VT2 // Type of the left-hand side vector operand
730  , typename MT1 > // Type of the right-hand side matrix operand
731  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
732  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A )
733  {
734  selectDefaultAssignKernel( y, x, A );
735  }
737  //**********************************************************************************************
738 
739  //**Vectorized default assignment to dense vectors (large matrices)*****************************
753  template< typename VT1 // Type of the left-hand side target vector
754  , typename VT2 // Type of the left-hand side vector operand
755  , typename MT1 > // Type of the right-hand side matrix operand
756  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
757  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A )
758  {
759  typedef IntrinsicTrait<ElementType> IT;
760 
761  const size_t M( A.rows() );
762  const size_t N( A.columns() );
763 
764  reset( y );
765 
766  size_t j( 0UL );
767 
768  for( ; (j+8UL) <= N; j+=8UL )
769  {
770  const size_t ibegin( ( IsLower<MT1>::value )
771  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
772  :( 0UL ) );
773  const size_t iend( ( IsUpper<MT1>::value )
774  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
775  :( M ) );
776  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
777 
778  size_t i( ibegin );
779 
780  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
781  const size_t i1( i+IT::size );
782  const size_t i2( i+IT::size*2UL );
783  const size_t i3( i+IT::size*3UL );
784  const IntrinsicType x1( x.load(i ) );
785  const IntrinsicType x2( x.load(i1) );
786  const IntrinsicType x3( x.load(i2) );
787  const IntrinsicType x4( x.load(i3) );
788  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
789  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
790  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
791  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
792  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) + x3 * A.load(i2,j+4UL) + x4 * A.load(i3,j+4UL) );
793  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) + x3 * A.load(i2,j+5UL) + x4 * A.load(i3,j+5UL) );
794  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) + x3 * A.load(i2,j+6UL) + x4 * A.load(i3,j+6UL) );
795  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) + x3 * A.load(i2,j+7UL) + x4 * A.load(i3,j+7UL) );
796  }
797 
798  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
799  const size_t i1( i+IT::size );
800  const IntrinsicType x1( x.load(i ) );
801  const IntrinsicType x2( x.load(i1) );
802  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
803  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
804  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
805  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
806  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) );
807  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) );
808  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) );
809  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) );
810  }
811 
812  if( i < iend ) {
813  const IntrinsicType x1( x.load(i) );
814  y[j ] += sum( x1 * A.load(i,j ) );
815  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
816  y[j+2UL] += sum( x1 * A.load(i,j+2UL) );
817  y[j+3UL] += sum( x1 * A.load(i,j+3UL) );
818  y[j+4UL] += sum( x1 * A.load(i,j+4UL) );
819  y[j+5UL] += sum( x1 * A.load(i,j+5UL) );
820  y[j+6UL] += sum( x1 * A.load(i,j+6UL) );
821  y[j+7UL] += sum( x1 * A.load(i,j+7UL) );
822  }
823  }
824 
825  for( ; (j+4UL) <= N; j+=4UL )
826  {
827  const size_t ibegin( ( IsLower<MT1>::value )
828  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
829  :( 0UL ) );
830  const size_t iend( ( IsUpper<MT1>::value )
831  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
832  :( M ) );
833  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
834 
835  size_t i( ibegin );
836 
837  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
838  const size_t i1( i+IT::size );
839  const size_t i2( i+IT::size*2UL );
840  const size_t i3( i+IT::size*3UL );
841  const IntrinsicType x1( x.load(i ) );
842  const IntrinsicType x2( x.load(i1) );
843  const IntrinsicType x3( x.load(i2) );
844  const IntrinsicType x4( x.load(i3) );
845  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
846  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
847  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
848  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
849  }
850 
851  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
852  const size_t i1( i+IT::size );
853  const IntrinsicType x1( x.load(i ) );
854  const IntrinsicType x2( x.load(i1) );
855  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
856  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
857  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
858  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
859  }
860 
861  if( i < iend ) {
862  const IntrinsicType x1( x.load(i) );
863  y[j ] += sum( x1 * A.load(i,j ) );
864  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
865  y[j+2UL] += sum( x1 * A.load(i,j+2UL) );
866  y[j+3UL] += sum( x1 * A.load(i,j+3UL) );
867  }
868  }
869 
870  for( ; (j+2UL) <= N; j+=2UL )
871  {
872  const size_t ibegin( ( IsLower<MT1>::value )
873  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
874  :( 0UL ) );
875  const size_t iend( ( IsUpper<MT1>::value )
876  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
877  :( M ) );
878  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
879 
880  size_t i( ibegin );
881 
882  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
883  const size_t i1( i+IT::size );
884  const size_t i2( i+IT::size*2UL );
885  const size_t i3( i+IT::size*3UL );
886  const IntrinsicType x1( x.load(i ) );
887  const IntrinsicType x2( x.load(i1) );
888  const IntrinsicType x3( x.load(i2) );
889  const IntrinsicType x4( x.load(i3) );
890  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
891  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
892  }
893 
894  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
895  const size_t i1( i+IT::size );
896  const IntrinsicType x1( x.load(i ) );
897  const IntrinsicType x2( x.load(i1) );
898  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
899  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
900  }
901 
902  if( i < iend ) {
903  const IntrinsicType x1( x.load(i) );
904  y[j ] += sum( x1 * A.load(i,j ) );
905  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
906  }
907  }
908 
909  if( j < N )
910  {
911  const size_t ibegin( ( IsLower<MT1>::value )
912  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
913  :( 0UL ) );
914  const size_t iend( ( IsUpper<MT1>::value )
915  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
916  :( M ) );
917  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
918 
919  size_t i( ibegin );
920 
921  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
922  const size_t i1( i+IT::size );
923  const size_t i2( i+IT::size*2UL );
924  const size_t i3( i+IT::size*3UL );
925  const IntrinsicType x1( x.load(i ) );
926  const IntrinsicType x2( x.load(i1) );
927  const IntrinsicType x3( x.load(i2) );
928  const IntrinsicType x4( x.load(i3) );
929  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) + x3 * A.load(i2,j) + x4 * A.load(i3,j) );
930  }
931 
932  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
933  const size_t i1( i+IT::size );
934  const IntrinsicType x1( x.load(i ) );
935  const IntrinsicType x2( x.load(i1) );
936  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) );
937  }
938 
939  if( i < iend ) {
940  const IntrinsicType x1( x.load(i) );
941  y[j] += sum( x1 * A.load(i,j) );
942  }
943  }
944  }
946  //**********************************************************************************************
947 
948  //**BLAS-based assignment to dense vectors (default)********************************************
962  template< typename VT1 // Type of the left-hand side target vector
963  , typename VT2 // Type of the left-hand side vector operand
964  , typename MT1 > // Type of the right-hand side matrix operand
965  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
966  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
967  {
968  selectLargeAssignKernel( y, x, A );
969  }
971  //**********************************************************************************************
972 
973  //**BLAS-based assignment to dense vectors (single precision)***********************************
974 #if BLAZE_BLAS_MODE
975 
988  template< typename VT1 // Type of the left-hand side target vector
989  , typename VT2 // Type of the left-hand side vector operand
990  , typename MT1 > // Type of the right-hand side matrix operand
991  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
992  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
993  {
994  if( IsTriangular<MT1>::value ) {
995  assign( y, x );
996  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
997  }
998  else {
999  sgemv( y, x, A, 1.0F, 0.0F );
1000  }
1001  }
1003 #endif
1004  //**********************************************************************************************
1005 
1006  //**BLAS-based assignment to dense vectors (double precision)***********************************
1007 #if BLAZE_BLAS_MODE
1008 
1021  template< typename VT1 // Type of the left-hand side target vector
1022  , typename VT2 // Type of the left-hand side vector operand
1023  , typename MT1 > // Type of the right-hand side matrix operand
1024  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1025  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
1026  {
1027  if( IsTriangular<MT1>::value ) {
1028  assign( y, x );
1029  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1030  }
1031  else {
1032  dgemv( y, x, A, 1.0, 0.0 );
1033  }
1034  }
1036 #endif
1037  //**********************************************************************************************
1038 
1039  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1040 #if BLAZE_BLAS_MODE
1041 
1054  template< typename VT1 // Type of the left-hand side target vector
1055  , typename VT2 // Type of the left-hand side vector operand
1056  , typename MT1 > // Type of the right-hand side matrix operand
1057  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1058  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
1059  {
1060  if( IsTriangular<MT1>::value ) {
1061  assign( y, x );
1062  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1063  }
1064  else {
1065  cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1066  }
1067  }
1069 #endif
1070  //**********************************************************************************************
1071 
1072  //**BLAS-based assignment to dense vectors (double precision complex)***************************
1073 #if BLAZE_BLAS_MODE
1074 
1087  template< typename VT1 // Type of the left-hand side target vector
1088  , typename VT2 // Type of the left-hand side vector operand
1089  , typename MT1 > // Type of the right-hand side matrix operand
1090  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1091  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
1092  {
1093  if( IsTriangular<MT1>::value ) {
1094  assign( y, x );
1095  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1096  }
1097  else {
1098  zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1099  }
1100  }
1102 #endif
1103  //**********************************************************************************************
1104 
1105  //**Assignment to sparse vectors****************************************************************
1118  template< typename VT1 > // Type of the target sparse vector
1119  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1120  {
1122 
1126 
1127  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1128 
1129  const ResultType tmp( serial( rhs ) );
1130  assign( ~lhs, tmp );
1131  }
1133  //**********************************************************************************************
1134 
1135  //**Addition assignment to dense vectors********************************************************
1148  template< typename VT1 > // Type of the target dense vector
1149  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1150  {
1152 
1153  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1154 
1155  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1156  return;
1157  }
1158 
1159  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
1160  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
1161 
1162  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1163  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1164  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1165  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1166 
1167  TDVecTDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
1168  }
1170  //**********************************************************************************************
1171 
1172  //**Addition assignment to dense vectors (kernel selection)*************************************
1183  template< typename VT1 // Type of the left-hand side target vector
1184  , typename VT2 // Type of the left-hand side vector operand
1185  , typename MT1 > // Type of the right-hand side matrix operand
1186  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1187  {
1188  if( ( IsDiagonal<MT1>::value ) ||
1189  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1190  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1191  selectSmallAddAssignKernel( y, x, A );
1192  else
1193  selectBlasAddAssignKernel( y, x, A );
1194  }
1196  //**********************************************************************************************
1197 
1198  //**Default addition assignment to dense vectors************************************************
1212  template< typename VT1 // Type of the left-hand side target vector
1213  , typename VT2 // Type of the left-hand side vector operand
1214  , typename MT1 > // Type of the right-hand side matrix operand
1215  static inline void selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1216  {
1217  y.addAssign( x * A );
1218  }
1220  //**********************************************************************************************
1221 
1222  //**Default addition assignment to dense vectors (small matrices)*******************************
1236  template< typename VT1 // Type of the left-hand side target vector
1237  , typename VT2 // Type of the left-hand side vector operand
1238  , typename MT1 > // Type of the right-hand side matrix operand
1239  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1240  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1241  {
1242  selectDefaultAddAssignKernel( y, x, A );
1243  }
1245  //**********************************************************************************************
1246 
1247  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1262  template< typename VT1 // Type of the left-hand side target vector
1263  , typename VT2 // Type of the left-hand side vector operand
1264  , typename MT1 > // Type of the right-hand side matrix operand
1265  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1266  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1267  {
1268  typedef IntrinsicTrait<ElementType> IT;
1269 
1270  const size_t M( A.rows() );
1271  const size_t N( A.columns() );
1272 
1273  size_t j( 0UL );
1274 
1275  for( ; (j+8UL) <= N; j+=8UL )
1276  {
1277  const size_t ibegin( ( IsLower<MT1>::value )
1278  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1279  :( 0UL ) );
1280  const size_t iend( ( IsUpper<MT1>::value )
1281  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
1282  :( M ) );
1283  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1284 
1285  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1286 
1287  for( size_t i=ibegin; i<iend; i+=IT::size ) {
1288  const IntrinsicType x1( x.load(i) );
1289  xmm1 = xmm1 + x1 * A.load(i,j );
1290  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1291  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1292  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1293  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
1294  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
1295  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
1296  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
1297  }
1298 
1299  y[j ] += sum( xmm1 );
1300  y[j+1UL] += sum( xmm2 );
1301  y[j+2UL] += sum( xmm3 );
1302  y[j+3UL] += sum( xmm4 );
1303  y[j+4UL] += sum( xmm5 );
1304  y[j+5UL] += sum( xmm6 );
1305  y[j+6UL] += sum( xmm7 );
1306  y[j+7UL] += sum( xmm8 );
1307  }
1308 
1309  for( ; (j+4UL) <= N; j+=4UL )
1310  {
1311  const size_t ibegin( ( IsLower<MT1>::value )
1312  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1313  :( 0UL ) );
1314  const size_t iend( ( IsUpper<MT1>::value )
1315  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
1316  :( M ) );
1317  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1318 
1319  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1320 
1321  for( size_t i=ibegin; i<iend; i+=IT::size ) {
1322  const IntrinsicType x1( x.load(i) );
1323  xmm1 = xmm1 + x1 * A.load(i,j );
1324  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1325  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1326  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1327  }
1328 
1329  y[j ] += sum( xmm1 );
1330  y[j+1UL] += sum( xmm2 );
1331  y[j+2UL] += sum( xmm3 );
1332  y[j+3UL] += sum( xmm4 );
1333  }
1334 
1335  for( ; (j+3UL) <= N; j+=3UL )
1336  {
1337  const size_t ibegin( ( IsLower<MT1>::value )
1338  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1339  :( 0UL ) );
1340  const size_t iend( ( IsUpper<MT1>::value )
1341  ?( IsStrictlyUpper<MT1>::value ? j+2UL : j+3UL )
1342  :( M ) );
1343  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1344 
1345  IntrinsicType xmm1, xmm2, xmm3;
1346 
1347  for( size_t i=ibegin; i<iend; i+=IT::size ) {
1348  const IntrinsicType x1( x.load(i) );
1349  xmm1 = xmm1 + x1 * A.load(i,j );
1350  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1351  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1352  }
1353 
1354  y[j ] += sum( xmm1 );
1355  y[j+1UL] += sum( xmm2 );
1356  y[j+2UL] += sum( xmm3 );
1357  }
1358 
1359  for( ; (j+2UL) <= N; j+=2UL )
1360  {
1361  const size_t ibegin( ( IsLower<MT1>::value )
1362  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1363  :( 0UL ) );
1364  const size_t iend( ( IsUpper<MT1>::value )
1365  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
1366  :( M ) );
1367  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1368 
1369  IntrinsicType xmm1, xmm2;
1370 
1371  for( size_t i=ibegin; i<iend; i+=IT::size ) {
1372  const IntrinsicType x1( x.load(i) );
1373  xmm1 = xmm1 + x1 * A.load(i,j );
1374  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1375  }
1376 
1377  y[j ] += sum( xmm1 );
1378  y[j+1UL] += sum( xmm2 );
1379  }
1380 
1381  if( j < N )
1382  {
1383  const size_t ibegin( ( IsLower<MT1>::value )
1384  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1385  :( 0UL ) );
1386  const size_t iend( ( IsUpper<MT1>::value )
1387  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1388  :( M ) );
1389  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1390 
1391  IntrinsicType xmm1;
1392 
1393  for( size_t i=ibegin; i<iend; i+=IT::size ) {
1394  xmm1 = xmm1 + A.load(i,j) * x.load(i);
1395  }
1396 
1397  y[j] += sum( xmm1 );
1398  }
1399  }
1401  //**********************************************************************************************
1402 
1403  //**Default addition assignment to dense vectors (large matrices)*******************************
1417  template< typename VT1 // Type of the left-hand side target vector
1418  , typename VT2 // Type of the left-hand side vector operand
1419  , typename MT1 > // Type of the right-hand side matrix operand
1420  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1421  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1422  {
1423  selectDefaultAddAssignKernel( y, x, A );
1424  }
1426  //**********************************************************************************************
1427 
1428  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1443  template< typename VT1 // Type of the left-hand side target vector
1444  , typename VT2 // Type of the left-hand side vector operand
1445  , typename MT1 > // Type of the right-hand side matrix operand
1446  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1447  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1448  {
1449  typedef IntrinsicTrait<ElementType> IT;
1450 
1451  const size_t M( A.rows() );
1452  const size_t N( A.columns() );
1453 
1454  size_t j( 0UL );
1455 
1456  for( ; (j+8UL) <= N; j+=8UL )
1457  {
1458  const size_t ibegin( ( IsLower<MT1>::value )
1459  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1460  :( 0UL ) );
1461  const size_t iend( ( IsUpper<MT1>::value )
1462  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
1463  :( M ) );
1464  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1465 
1466  size_t i( ibegin );
1467 
1468  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
1469  const size_t i1( i+IT::size );
1470  const size_t i2( i+IT::size*2UL );
1471  const size_t i3( i+IT::size*3UL );
1472  const IntrinsicType x1( x.load(i ) );
1473  const IntrinsicType x2( x.load(i1) );
1474  const IntrinsicType x3( x.load(i2) );
1475  const IntrinsicType x4( x.load(i3) );
1476  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
1477  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
1478  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
1479  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
1480  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) + x3 * A.load(i2,j+4UL) + x4 * A.load(i3,j+4UL) );
1481  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) + x3 * A.load(i2,j+5UL) + x4 * A.load(i3,j+5UL) );
1482  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) + x3 * A.load(i2,j+6UL) + x4 * A.load(i3,j+6UL) );
1483  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) + x3 * A.load(i2,j+7UL) + x4 * A.load(i3,j+7UL) );
1484  }
1485 
1486  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
1487  const size_t i1( i+IT::size );
1488  const IntrinsicType x1( x.load(i ) );
1489  const IntrinsicType x2( x.load(i1) );
1490  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
1491  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
1492  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
1493  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
1494  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) );
1495  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) );
1496  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) );
1497  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) );
1498  }
1499 
1500  if( i < iend ) {
1501  const IntrinsicType x1( x.load(i) );
1502  y[j ] += sum( x1 * A.load(i,j ) );
1503  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
1504  y[j+2UL] += sum( x1 * A.load(i,j+2UL) );
1505  y[j+3UL] += sum( x1 * A.load(i,j+3UL) );
1506  y[j+4UL] += sum( x1 * A.load(i,j+4UL) );
1507  y[j+5UL] += sum( x1 * A.load(i,j+5UL) );
1508  y[j+6UL] += sum( x1 * A.load(i,j+6UL) );
1509  y[j+7UL] += sum( x1 * A.load(i,j+7UL) );
1510  }
1511  }
1512 
1513  for( ; (j+4UL) <= N; j+=4UL )
1514  {
1515  const size_t ibegin( ( IsLower<MT1>::value )
1516  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1517  :( 0UL ) );
1518  const size_t iend( ( IsUpper<MT1>::value )
1519  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
1520  :( M ) );
1521  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1522 
1523  size_t i( ibegin );
1524 
1525  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
1526  const size_t i1( i+IT::size );
1527  const size_t i2( i+IT::size*2UL );
1528  const size_t i3( i+IT::size*3UL );
1529  const IntrinsicType x1( x.load(i ) );
1530  const IntrinsicType x2( x.load(i1) );
1531  const IntrinsicType x3( x.load(i2) );
1532  const IntrinsicType x4( x.load(i3) );
1533  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
1534  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
1535  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
1536  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
1537  }
1538 
1539  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
1540  const size_t i1( i+IT::size );
1541  const IntrinsicType x1( x.load(i ) );
1542  const IntrinsicType x2( x.load(i1) );
1543  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
1544  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
1545  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
1546  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
1547  }
1548 
1549  if( i < iend ) {
1550  const IntrinsicType x1( x.load(i) );
1551  y[j ] += sum( x1 * A.load(i,j ) );
1552  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
1553  y[j+2UL] += sum( x1 * A.load(i,j+2UL) );
1554  y[j+3UL] += sum( x1 * A.load(i,j+3UL) );
1555  }
1556  }
1557 
1558  for( ; (j+2UL) <= N; j+=2UL )
1559  {
1560  const size_t ibegin( ( IsLower<MT1>::value )
1561  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1562  :( 0UL ) );
1563  const size_t iend( ( IsUpper<MT1>::value )
1564  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
1565  :( M ) );
1566  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1567 
1568  size_t i( ibegin );
1569 
1570  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
1571  const size_t i1( i+IT::size );
1572  const size_t i2( i+IT::size*2UL );
1573  const size_t i3( i+IT::size*3UL );
1574  const IntrinsicType x1( x.load(i ) );
1575  const IntrinsicType x2( x.load(i1) );
1576  const IntrinsicType x3( x.load(i2) );
1577  const IntrinsicType x4( x.load(i3) );
1578  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
1579  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
1580  }
1581 
1582  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
1583  const size_t i1( i+IT::size );
1584  const IntrinsicType x1( x.load(i ) );
1585  const IntrinsicType x2( x.load(i1) );
1586  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
1587  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
1588  }
1589 
1590  if( i < iend ) {
1591  const IntrinsicType x1( x.load(i) );
1592  y[j ] += sum( x1 * A.load(i,j ) );
1593  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
1594  }
1595  }
1596 
1597  if( j < N )
1598  {
1599  const size_t ibegin( ( IsLower<MT1>::value )
1600  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1601  :( 0UL ) );
1602  const size_t iend( ( IsUpper<MT1>::value )
1603  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1604  :( M ) );
1605  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1606 
1607  size_t i( ibegin );
1608 
1609  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
1610  const size_t i1( i+IT::size );
1611  const size_t i2( i+IT::size*2UL );
1612  const size_t i3( i+IT::size*3UL );
1613  const IntrinsicType x1( x.load(i ) );
1614  const IntrinsicType x2( x.load(i1) );
1615  const IntrinsicType x3( x.load(i2) );
1616  const IntrinsicType x4( x.load(i3) );
1617  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) + x3 * A.load(i2,j) + x4 * A.load(i3,j) );
1618  }
1619 
1620  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
1621  const size_t i1( i+IT::size );
1622  const IntrinsicType x1( x.load(i ) );
1623  const IntrinsicType x2( x.load(i1) );
1624  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) );
1625  }
1626 
1627  if( i < iend ) {
1628  const IntrinsicType x1( x.load(i) );
1629  y[j] += sum( x1 * A.load(i,j) );
1630  }
1631  }
1632  }
1634  //**********************************************************************************************
1635 
1636  //**BLAS-based addition assignment to dense vectors (default)***********************************
1650  template< typename VT1 // Type of the left-hand side target vector
1651  , typename VT2 // Type of the left-hand side vector operand
1652  , typename MT1 > // Type of the right-hand side matrix operand
1653  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1654  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1655  {
1656  selectLargeAddAssignKernel( y, x, A );
1657  }
1659  //**********************************************************************************************
1660 
1661  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1662 #if BLAZE_BLAS_MODE
1663 
1676  template< typename VT1 // Type of the left-hand side target vector
1677  , typename VT2 // Type of the left-hand side vector operand
1678  , typename MT1 > // Type of the right-hand side matrix operand
1679  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1680  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1681  {
1682  if( IsTriangular<MT1>::value ) {
1683  typename VT1::ResultType tmp( x );
1684  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1685  addAssign( y, tmp );
1686  }
1687  else {
1688  sgemv( y, x, A, 1.0F, 1.0F );
1689  }
1690  }
1692 #endif
1693  //**********************************************************************************************
1694 
1695  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1696 #if BLAZE_BLAS_MODE
1697 
1710  template< typename VT1 // Type of the left-hand side target vector
1711  , typename VT2 // Type of the left-hand side vector operand
1712  , typename MT1 > // Type of the right-hand side matrix operand
1713  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1714  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1715  {
1716  if( IsTriangular<MT1>::value ) {
1717  typename VT1::ResultType tmp( x );
1718  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1719  addAssign( y, tmp );
1720  }
1721  else {
1722  dgemv( y, x, A, 1.0, 1.0 );
1723  }
1724  }
1726 #endif
1727  //**********************************************************************************************
1728 
1729  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1730 #if BLAZE_BLAS_MODE
1731 
1744  template< typename VT1 // Type of the left-hand side target vector
1745  , typename VT2 // Type of the left-hand side vector operand
1746  , typename MT1 > // Type of the right-hand side matrix operand
1747  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1748  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1749  {
1750  if( IsTriangular<MT1>::value ) {
1751  typename VT1::ResultType tmp( x );
1752  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1753  addAssign( y, tmp );
1754  }
1755  else {
1756  cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1757  }
1758  }
1760 #endif
1761  //**********************************************************************************************
1762 
1763  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1764 #if BLAZE_BLAS_MODE
1765 
1778  template< typename VT1 // Type of the left-hand side target vector
1779  , typename VT2 // Type of the left-hand side vector operand
1780  , typename MT1 > // Type of the right-hand side matrix operand
1781  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1782  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1783  {
1784  if( IsTriangular<MT1>::value ) {
1785  typename VT1::ResultType tmp( x );
1786  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1787  addAssign( y, tmp );
1788  }
1789  else {
1790  zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1791  }
1792  }
1794 #endif
1795  //**********************************************************************************************
1796 
1797  //**Addition assignment to sparse vectors*******************************************************
1798  // No special implementation for the addition assignment to sparse vectors.
1799  //**********************************************************************************************
1800 
1801  //**Subtraction assignment to dense vectors*****************************************************
1814  template< typename VT1 > // Type of the target dense vector
1815  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
1816  {
1818 
1819  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1820 
1821  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1822  return;
1823  }
1824 
1825  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
1826  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
1827 
1828  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1829  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1830  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1831  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1832 
1833  TDVecTDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1834  }
1836  //**********************************************************************************************
1837 
1838  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1849  template< typename VT1 // Type of the left-hand side target vector
1850  , typename VT2 // Type of the left-hand side vector operand
1851  , typename MT1 > // Type of the right-hand side matrix operand
1852  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1853  {
1854  if( ( IsDiagonal<MT1>::value ) ||
1855  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1856  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
1857  selectSmallSubAssignKernel( y, x, A );
1858  else
1859  selectBlasSubAssignKernel( y, x, A );
1860  }
1862  //**********************************************************************************************
1863 
1864  //**Default subtraction assignment to dense vectors*********************************************
1878  template< typename VT1 // Type of the left-hand side target vector
1879  , typename VT2 // Type of the left-hand side vector operand
1880  , typename MT1 > // Type of the right-hand side matrix operand
1881  static inline void selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1882  {
1883  y.subAssign( x * A );
1884  }
1886  //**********************************************************************************************
1887 
1888  //**Default subtraction assignment to dense vectors (small matrices)****************************
1902  template< typename VT1 // Type of the left-hand side target vector
1903  , typename VT2 // Type of the left-hand side vector operand
1904  , typename MT1 > // Type of the right-hand side matrix operand
1905  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1906  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1907  {
1908  selectDefaultSubAssignKernel( y, x, A );
1909  }
1911  //**********************************************************************************************
1912 
1913  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1928  template< typename VT1 // Type of the left-hand side target vector
1929  , typename VT2 // Type of the left-hand side vector operand
1930  , typename MT1 > // Type of the right-hand side matrix operand
1931  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1932  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1933  {
1934  typedef IntrinsicTrait<ElementType> IT;
1935 
1936  const size_t M( A.rows() );
1937  const size_t N( A.columns() );
1938 
1939  size_t j( 0UL );
1940 
1941  for( ; (j+8UL) <= N; j+=8UL )
1942  {
1943  const size_t ibegin( ( IsLower<MT1>::value )
1944  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1945  :( 0UL ) );
1946  const size_t iend( ( IsUpper<MT1>::value )
1947  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
1948  :( M ) );
1949  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1950 
1951  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1952 
1953  for( size_t i=ibegin; i<iend; i+=IT::size ) {
1954  const IntrinsicType x1( x.load(i) );
1955  xmm1 = xmm1 + x1 * A.load(i,j );
1956  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1957  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1958  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1959  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
1960  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
1961  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
1962  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
1963  }
1964 
1965  y[j ] -= sum( xmm1 );
1966  y[j+1UL] -= sum( xmm2 );
1967  y[j+2UL] -= sum( xmm3 );
1968  y[j+3UL] -= sum( xmm4 );
1969  y[j+4UL] -= sum( xmm5 );
1970  y[j+5UL] -= sum( xmm6 );
1971  y[j+6UL] -= sum( xmm7 );
1972  y[j+7UL] -= sum( xmm8 );
1973  }
1974 
1975  for( ; (j+4UL) <= N; j+=4UL )
1976  {
1977  const size_t ibegin( ( IsLower<MT1>::value )
1978  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
1979  :( 0UL ) );
1980  const size_t iend( ( IsUpper<MT1>::value )
1981  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
1982  :( M ) );
1983  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1984 
1985  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1986 
1987  for( size_t i=ibegin; i<iend; i+=IT::size ) {
1988  const IntrinsicType x1( x.load(i) );
1989  xmm1 = xmm1 + x1 * A.load(i,j );
1990  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1991  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1992  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1993  }
1994 
1995  y[j ] -= sum( xmm1 );
1996  y[j+1UL] -= sum( xmm2 );
1997  y[j+2UL] -= sum( xmm3 );
1998  y[j+3UL] -= sum( xmm4 );
1999  }
2000 
2001  for( ; (j+3UL) <= N; j+=3UL )
2002  {
2003  const size_t ibegin( ( IsLower<MT1>::value )
2004  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
2005  :( 0UL ) );
2006  const size_t iend( ( IsUpper<MT1>::value )
2007  ?( IsStrictlyUpper<MT1>::value ? j+2UL : j+3UL )
2008  :( M ) );
2009  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2010 
2011  IntrinsicType xmm1, xmm2, xmm3;
2012 
2013  for( size_t i=ibegin; i<iend; i+=IT::size ) {
2014  const IntrinsicType x1( x.load(i) );
2015  xmm1 = xmm1 + x1 * A.load(i,j );
2016  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2017  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2018  }
2019 
2020  y[j ] -= sum( xmm1 );
2021  y[j+1UL] -= sum( xmm2 );
2022  y[j+2UL] -= sum( xmm3 );
2023  }
2024 
2025  for( ; (j+2UL) <= N; j+=2UL )
2026  {
2027  const size_t ibegin( ( IsLower<MT1>::value )
2028  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
2029  :( 0UL ) );
2030  const size_t iend( ( IsUpper<MT1>::value )
2031  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
2032  :( M ) );
2033  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2034 
2035  IntrinsicType xmm1, xmm2;
2036 
2037  for( size_t i=ibegin; i<iend; i+=IT::size ) {
2038  const IntrinsicType x1( x.load(i) );
2039  xmm1 = xmm1 + x1 * A.load(i,j );
2040  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2041  }
2042 
2043  y[j ] -= sum( xmm1 );
2044  y[j+1UL] -= sum( xmm2 );
2045  }
2046 
2047  if( j < N )
2048  {
2049  const size_t ibegin( ( IsLower<MT1>::value )
2050  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
2051  :( 0UL ) );
2052  const size_t iend( ( IsUpper<MT1>::value )
2053  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
2054  :( M ) );
2055  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2056 
2057  IntrinsicType xmm1;
2058 
2059  for( size_t i=ibegin; i<iend; i+=IT::size ) {
2060  xmm1 = xmm1 + A.load(i,j) * x.load(i);
2061  }
2062 
2063  y[j] -= sum( xmm1 );
2064  }
2065  }
2067  //**********************************************************************************************
2068 
2069  //**Default subtraction assignment to dense vectors (large matrices)****************************
2083  template< typename VT1 // Type of the left-hand side target vector
2084  , typename VT2 // Type of the left-hand side vector operand
2085  , typename MT1 > // Type of the right-hand side matrix operand
2086  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
2087  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2088  {
2089  selectDefaultSubAssignKernel( y, x, A );
2090  }
2092  //**********************************************************************************************
2093 
2094  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
2109  template< typename VT1 // Type of the left-hand side target vector
2110  , typename VT2 // Type of the left-hand side vector operand
2111  , typename MT1 > // Type of the right-hand side matrix operand
2112  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
2113  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2114  {
2115  typedef IntrinsicTrait<ElementType> IT;
2116 
2117  const size_t M( A.rows() );
2118  const size_t N( A.columns() );
2119 
2120  size_t j( 0UL );
2121 
2122  for( ; (j+8UL) <= N; j+=8UL )
2123  {
2124  const size_t ibegin( ( IsLower<MT1>::value )
2125  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
2126  :( 0UL ) );
2127  const size_t iend( ( IsUpper<MT1>::value )
2128  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
2129  :( M ) );
2130  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2131 
2132  size_t i( ibegin );
2133 
2134  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
2135  const size_t i1( i+IT::size );
2136  const size_t i2( i+IT::size*2UL );
2137  const size_t i3( i+IT::size*3UL );
2138  const IntrinsicType x1( x.load(i ) );
2139  const IntrinsicType x2( x.load(i1) );
2140  const IntrinsicType x3( x.load(i2) );
2141  const IntrinsicType x4( x.load(i3) );
2142  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
2143  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
2144  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
2145  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
2146  y[j+4UL] -= sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) + x3 * A.load(i2,j+4UL) + x4 * A.load(i3,j+4UL) );
2147  y[j+5UL] -= sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) + x3 * A.load(i2,j+5UL) + x4 * A.load(i3,j+5UL) );
2148  y[j+6UL] -= sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) + x3 * A.load(i2,j+6UL) + x4 * A.load(i3,j+6UL) );
2149  y[j+7UL] -= sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) + x3 * A.load(i2,j+7UL) + x4 * A.load(i3,j+7UL) );
2150  }
2151 
2152  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
2153  const size_t i1( i+IT::size );
2154  const IntrinsicType x1( x.load(i ) );
2155  const IntrinsicType x2( x.load(i1) );
2156  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
2157  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
2158  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
2159  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
2160  y[j+4UL] -= sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) );
2161  y[j+5UL] -= sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) );
2162  y[j+6UL] -= sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) );
2163  y[j+7UL] -= sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) );
2164  }
2165 
2166  if( i < iend ) {
2167  const IntrinsicType x1( x.load(i) );
2168  y[j ] -= sum( x1 * A.load(i,j ) );
2169  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) );
2170  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) );
2171  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) );
2172  y[j+4UL] -= sum( x1 * A.load(i,j+4UL) );
2173  y[j+5UL] -= sum( x1 * A.load(i,j+5UL) );
2174  y[j+6UL] -= sum( x1 * A.load(i,j+6UL) );
2175  y[j+7UL] -= sum( x1 * A.load(i,j+7UL) );
2176  }
2177  }
2178 
2179  for( ; (j+4UL) <= N; j+=4UL )
2180  {
2181  const size_t ibegin( ( IsLower<MT1>::value )
2182  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
2183  :( 0UL ) );
2184  const size_t iend( ( IsUpper<MT1>::value )
2185  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
2186  :( M ) );
2187  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2188 
2189  size_t i( ibegin );
2190 
2191  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
2192  const size_t i1( i+IT::size );
2193  const size_t i2( i+IT::size*2UL );
2194  const size_t i3( i+IT::size*3UL );
2195  const IntrinsicType x1( x.load(i ) );
2196  const IntrinsicType x2( x.load(i1) );
2197  const IntrinsicType x3( x.load(i2) );
2198  const IntrinsicType x4( x.load(i3) );
2199  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
2200  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
2201  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
2202  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
2203  }
2204 
2205  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
2206  const size_t i1( i+IT::size );
2207  const IntrinsicType x1( x.load(i ) );
2208  const IntrinsicType x2( x.load(i1) );
2209  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
2210  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
2211  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
2212  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
2213  }
2214 
2215  if( i < iend ) {
2216  const IntrinsicType x1( x.load(i) );
2217  y[j ] -= sum( x1 * A.load(i,j ) );
2218  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) );
2219  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) );
2220  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) );
2221  }
2222  }
2223 
2224  for( ; (j+2UL) <= N; j+=2UL )
2225  {
2226  const size_t ibegin( ( IsLower<MT1>::value )
2227  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
2228  :( 0UL ) );
2229  const size_t iend( ( IsUpper<MT1>::value )
2230  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
2231  :( M ) );
2232  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2233 
2234  size_t i( ibegin );
2235 
2236  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
2237  const size_t i1( i+IT::size );
2238  const size_t i2( i+IT::size*2UL );
2239  const size_t i3( i+IT::size*3UL );
2240  const IntrinsicType x1( x.load(i ) );
2241  const IntrinsicType x2( x.load(i1) );
2242  const IntrinsicType x3( x.load(i2) );
2243  const IntrinsicType x4( x.load(i3) );
2244  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
2245  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
2246  }
2247 
2248  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
2249  const size_t i1( i+IT::size );
2250  const IntrinsicType x1( x.load(i ) );
2251  const IntrinsicType x2( x.load(i1) );
2252  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
2253  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
2254  }
2255 
2256  if( i < iend ) {
2257  const IntrinsicType x1( x.load(i) );
2258  y[j ] -= sum( x1 * A.load(i,j ) );
2259  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) );
2260  }
2261  }
2262 
2263  if( j < N )
2264  {
2265  const size_t ibegin( ( IsLower<MT1>::value )
2266  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
2267  :( 0UL ) );
2268  const size_t iend( ( IsUpper<MT1>::value )
2269  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
2270  :( M ) );
2271  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2272 
2273  size_t i( ibegin );
2274 
2275  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
2276  const size_t i1( i+IT::size );
2277  const size_t i2( i+IT::size*2UL );
2278  const size_t i3( i+IT::size*3UL );
2279  const IntrinsicType x1( x.load(i ) );
2280  const IntrinsicType x2( x.load(i1) );
2281  const IntrinsicType x3( x.load(i2) );
2282  const IntrinsicType x4( x.load(i3) );
2283  y[j] -= sum( x1 * A.load(i,j) + x2 * A.load(i1,j) + x3 * A.load(i2,j) + x4 * A.load(i3,j) );
2284  }
2285 
2286  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
2287  const size_t i1( i+IT::size );
2288  const IntrinsicType x1( x.load(i ) );
2289  const IntrinsicType x2( x.load(i1) );
2290  y[j] -= sum( x1 * A.load(i,j) + x2 * A.load(i1,j) );
2291  }
2292 
2293  if( i < iend ) {
2294  const IntrinsicType x1( x.load(i) );
2295  y[j] -= sum( x1 * A.load(i,j) );
2296  }
2297  }
2298  }
2300  //**********************************************************************************************
2301 
2302  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2316  template< typename VT1 // Type of the left-hand side target vector
2317  , typename VT2 // Type of the left-hand side vector operand
2318  , typename MT1 > // Type of the right-hand side matrix operand
2319  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
2320  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2321  {
2322  selectLargeSubAssignKernel( y, x, A );
2323  }
2325  //**********************************************************************************************
2326 
2327  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2328 #if BLAZE_BLAS_MODE
2329 
2342  template< typename VT1 // Type of the left-hand side target vector
2343  , typename VT2 // Type of the left-hand side vector operand
2344  , typename MT1 > // Type of the right-hand side matrix operand
2345  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
2346  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2347  {
2348  if( IsTriangular<MT1>::value ) {
2349  typename VT1::ResultType tmp( x );
2350  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2351  subAssign( y, tmp );
2352  }
2353  else {
2354  sgemv( y, x, A, -1.0F, 1.0F );
2355  }
2356  }
2358 #endif
2359  //**********************************************************************************************
2360 
2361  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2362 #if BLAZE_BLAS_MODE
2363 
2376  template< typename VT1 // Type of the left-hand side target vector
2377  , typename VT2 // Type of the left-hand side vector operand
2378  , typename MT1 > // Type of the right-hand side matrix operand
2379  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
2380  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2381  {
2382  if( IsTriangular<MT1>::value ) {
2383  typename VT1::ResultType tmp( x );
2384  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2385  subAssign( y, tmp );
2386  }
2387  else {
2388  dgemv( y, x, A, -1.0, 1.0 );
2389  }
2390  }
2392 #endif
2393  //**********************************************************************************************
2394 
2395  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2396 #if BLAZE_BLAS_MODE
2397 
2410  template< typename VT1 // Type of the left-hand side target vector
2411  , typename VT2 // Type of the left-hand side vector operand
2412  , typename MT1 > // Type of the right-hand side matrix operand
2413  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2414  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2415  {
2416  if( IsTriangular<MT1>::value ) {
2417  typename VT1::ResultType tmp( x );
2418  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2419  subAssign( y, tmp );
2420  }
2421  else {
2422  cgemv( y, x, A, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2423  }
2424  }
2426 #endif
2427  //**********************************************************************************************
2428 
2429  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2430 #if BLAZE_BLAS_MODE
2431 
2444  template< typename VT1 // Type of the left-hand side target vector
2445  , typename VT2 // Type of the left-hand side vector operand
2446  , typename MT1 > // Type of the right-hand side matrix operand
2447  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2448  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2449  {
2450  if( IsTriangular<MT1>::value ) {
2451  typename VT1::ResultType tmp( x );
2452  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2453  subAssign( y, tmp );
2454  }
2455  else {
2456  zgemv( y, x, A, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2457  }
2458  }
2460 #endif
2461  //**********************************************************************************************
2462 
2463  //**Subtraction assignment to sparse vectors****************************************************
2464  // No special implementation for the subtraction assignment to sparse vectors.
2465  //**********************************************************************************************
2466 
2467  //**Multiplication assignment to dense vectors**************************************************
2480  template< typename VT1 > // Type of the target dense vector
2481  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
2482  {
2484 
2488 
2489  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2490 
2491  const ResultType tmp( serial( rhs ) );
2492  multAssign( ~lhs, tmp );
2493  }
2495  //**********************************************************************************************
2496 
2497  //**Multiplication assignment to sparse vectors*************************************************
2498  // No special implementation for the multiplication assignment to sparse vectors.
2499  //**********************************************************************************************
2500 
2501  //**SMP assignment to dense vectors*************************************************************
2516  template< typename VT1 > // Type of the target dense vector
2517  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2518  smpAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
2519  {
2521 
2522  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2523 
2524  if( rhs.mat_.rows() == 0UL ) {
2525  reset( ~lhs );
2526  return;
2527  }
2528  else if( rhs.mat_.columns() == 0UL ) {
2529  return;
2530  }
2531 
2532  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2533  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2534 
2535  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2536  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2537  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2538  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2539 
2540  smpAssign( ~lhs, x * A );
2541  }
2543  //**********************************************************************************************
2544 
2545  //**SMP assignment to sparse vectors************************************************************
2560  template< typename VT1 > // Type of the target sparse vector
2561  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2562  smpAssign( SparseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
2563  {
2565 
2569 
2570  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2571 
2572  const ResultType tmp( rhs );
2573  smpAssign( ~lhs, tmp );
2574  }
2576  //**********************************************************************************************
2577 
2578  //**SMP addition assignment to dense vectors****************************************************
2593  template< typename VT1 > // Type of the target dense vector
2594  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2595  smpAddAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
2596  {
2598 
2599  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2600 
2601  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2602  return;
2603  }
2604 
2605  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2606  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2607 
2608  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2609  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2610  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2611  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2612 
2613  smpAddAssign( ~lhs, x * A );
2614  }
2616  //**********************************************************************************************
2617 
2618  //**SMP addition assignment to sparse vectors***************************************************
2619  // No special implementation for the SMP addition assignment to sparse vectors.
2620  //**********************************************************************************************
2621 
2622  //**SMP subtraction assignment to dense vectors*************************************************
2637  template< typename VT1 > // Type of the target dense vector
2638  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2639  smpSubAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
2640  {
2642 
2643  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2644 
2645  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2646  return;
2647  }
2648 
2649  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2650  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2651 
2652  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2653  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2654  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2655  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2656 
2657  smpSubAssign( ~lhs, x * A );
2658  }
2660  //**********************************************************************************************
2661 
2662  //**SMP subtraction assignment to sparse vectors************************************************
2663  // No special implementation for the SMP subtraction assignment to sparse vectors.
2664  //**********************************************************************************************
2665 
2666  //**SMP multiplication assignment to dense vectors**********************************************
2681  template< typename VT1 > // Type of the target dense vector
2682  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2683  smpMultAssign( DenseVector<VT1,true>& lhs, const TDVecTDMatMultExpr& rhs )
2684  {
2686 
2690 
2691  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2692 
2693  const ResultType tmp( rhs );
2694  smpMultAssign( ~lhs, tmp );
2695  }
2697  //**********************************************************************************************
2698 
2699  //**SMP multiplication assignment to sparse vectors*********************************************
2700  // No special implementation for the SMP multiplication assignment to sparse vectors.
2701  //**********************************************************************************************
2702 
2703  //**Compile time checks*************************************************************************
2711  //**********************************************************************************************
2712 };
2713 //*************************************************************************************************
2714 
2715 
2716 
2717 
2718 //=================================================================================================
2719 //
2720 // DVECSCALARMULTEXPR SPECIALIZATION
2721 //
2722 //=================================================================================================
2723 
2724 //*************************************************************************************************
2732 template< typename VT // Type of the left-hand side dense vector
2733  , typename MT // Type of the right-hand side dense matrix
2734  , typename ST > // Type of the side scalar value
2735 class DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >
2736  : public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
2737  , private VecScalarMultExpr
2738  , private Computation
2739 {
2740  private:
2741  //**Type definitions****************************************************************************
2742  typedef TDVecTDMatMultExpr<VT,MT> VMM;
2743  typedef typename VMM::ResultType RES;
2744  typedef typename VT::ResultType VRT;
2745  typedef typename MT::ResultType MRT;
2746  typedef typename VRT::ElementType VET;
2747  typedef typename MRT::ElementType MET;
2748  typedef typename VT::CompositeType VCT;
2749  typedef typename MT::CompositeType MCT;
2750  //**********************************************************************************************
2751 
2752  //**********************************************************************************************
2754  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2755  //**********************************************************************************************
2756 
2757  //**********************************************************************************************
2759  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2760  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2761  //**********************************************************************************************
2762 
2763  //**********************************************************************************************
2765 
2768  template< typename T1 >
2769  struct UseSMPAssign {
2770  enum { value = T1::smpAssignable && ( evaluateVector || evaluateMatrix ) };
2771  };
2772  //**********************************************************************************************
2773 
2774  //**********************************************************************************************
2776 
2779  template< typename T1, typename T2, typename T3, typename T4 >
2780  struct UseSinglePrecisionKernel {
2781  enum { value = BLAZE_BLAS_MODE &&
2782  HasMutableDataAccess<T1>::value &&
2783  HasConstDataAccess<T2>::value &&
2784  HasConstDataAccess<T3>::value &&
2785  !IsDiagonal<T3>::value &&
2786  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2787  IsFloat<typename T1::ElementType>::value &&
2788  IsFloat<typename T2::ElementType>::value &&
2789  IsFloat<typename T3::ElementType>::value &&
2790  !IsComplex<T4>::value };
2791  };
2792  //**********************************************************************************************
2793 
2794  //**********************************************************************************************
2796 
2799  template< typename T1, typename T2, typename T3, typename T4 >
2800  struct UseDoublePrecisionKernel {
2801  enum { value = BLAZE_BLAS_MODE &&
2802  HasMutableDataAccess<T1>::value &&
2803  HasConstDataAccess<T2>::value &&
2804  HasConstDataAccess<T3>::value &&
2805  !IsDiagonal<T3>::value &&
2806  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2807  IsDouble<typename T1::ElementType>::value &&
2808  IsDouble<typename T2::ElementType>::value &&
2809  IsDouble<typename T3::ElementType>::value &&
2810  !IsComplex<T4>::value };
2811  };
2812  //**********************************************************************************************
2813 
2814  //**********************************************************************************************
2816 
2819  template< typename T1, typename T2, typename T3 >
2820  struct UseSinglePrecisionComplexKernel {
2821  typedef complex<float> Type;
2822  enum { value = BLAZE_BLAS_MODE &&
2823  HasMutableDataAccess<T1>::value &&
2824  HasConstDataAccess<T2>::value &&
2825  HasConstDataAccess<T3>::value &&
2826  !IsDiagonal<T3>::value &&
2827  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2828  IsSame<typename T1::ElementType,Type>::value &&
2829  IsSame<typename T2::ElementType,Type>::value &&
2830  IsSame<typename T3::ElementType,Type>::value };
2831  };
2832  //**********************************************************************************************
2833 
2834  //**********************************************************************************************
2836 
2839  template< typename T1, typename T2, typename T3 >
2840  struct UseDoublePrecisionComplexKernel {
2841  typedef complex<double> Type;
2842  enum { value = BLAZE_BLAS_MODE &&
2843  HasMutableDataAccess<T1>::value &&
2844  HasConstDataAccess<T2>::value &&
2845  HasConstDataAccess<T3>::value &&
2846  !IsDiagonal<T3>::value &&
2847  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2848  IsSame<typename T1::ElementType,Type>::value &&
2849  IsSame<typename T2::ElementType,Type>::value &&
2850  IsSame<typename T3::ElementType,Type>::value };
2851  };
2852  //**********************************************************************************************
2853 
2854  //**********************************************************************************************
2856 
2858  template< typename T1, typename T2, typename T3, typename T4 >
2859  struct UseDefaultKernel {
2860  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2861  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2862  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2863  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2864  };
2865  //**********************************************************************************************
2866 
2867  //**********************************************************************************************
2869 
2872  template< typename T1, typename T2, typename T3, typename T4 >
2873  struct UseVectorizedDefaultKernel {
2874  enum { value = !IsDiagonal<T3>::value &&
2875  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2876  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2877  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2878  IsSame<typename T1::ElementType,T4>::value &&
2879  IntrinsicTrait<typename T1::ElementType>::addition &&
2880  IntrinsicTrait<typename T1::ElementType>::multiplication };
2881  };
2882  //**********************************************************************************************
2883 
2884  public:
2885  //**Type definitions****************************************************************************
2886  typedef DVecScalarMultExpr<VMM,ST,true> This;
2887  typedef typename MultTrait<RES,ST>::Type ResultType;
2888  typedef typename ResultType::TransposeType TransposeType;
2889  typedef typename ResultType::ElementType ElementType;
2890  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2891  typedef const ElementType ReturnType;
2892  typedef const ResultType CompositeType;
2893 
2895  typedef const TDVecTDMatMultExpr<VT,MT> LeftOperand;
2896 
2898  typedef ST RightOperand;
2899 
2901  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
2902 
2904  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
2905  //**********************************************************************************************
2906 
2907  //**Compilation flags***************************************************************************
2909  enum { vectorizable = !IsDiagonal<MT>::value &&
2910  VT::vectorizable && MT::vectorizable &&
2911  IsSame<VET,MET>::value &&
2912  IsSame<VET,ST>::value &&
2913  IntrinsicTrait<VET>::addition &&
2914  IntrinsicTrait<VET>::multiplication };
2915 
2917  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2918  !evaluateMatrix && MT::smpAssignable };
2919  //**********************************************************************************************
2920 
2921  //**Constructor*********************************************************************************
2927  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
2928  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2929  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2930  {}
2931  //**********************************************************************************************
2932 
2933  //**Subscript operator**************************************************************************
2939  inline ReturnType operator[]( size_t index ) const {
2940  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2941  return vector_[index] * scalar_;
2942  }
2943  //**********************************************************************************************
2944 
2945  //**Size function*******************************************************************************
2950  inline size_t size() const {
2951  return vector_.size();
2952  }
2953  //**********************************************************************************************
2954 
2955  //**Left operand access*************************************************************************
2960  inline LeftOperand leftOperand() const {
2961  return vector_;
2962  }
2963  //**********************************************************************************************
2964 
2965  //**Right operand access************************************************************************
2970  inline RightOperand rightOperand() const {
2971  return scalar_;
2972  }
2973  //**********************************************************************************************
2974 
2975  //**********************************************************************************************
2981  template< typename T >
2982  inline bool canAlias( const T* alias ) const {
2983  return vector_.canAlias( alias );
2984  }
2985  //**********************************************************************************************
2986 
2987  //**********************************************************************************************
2993  template< typename T >
2994  inline bool isAliased( const T* alias ) const {
2995  return vector_.isAliased( alias );
2996  }
2997  //**********************************************************************************************
2998 
2999  //**********************************************************************************************
3004  inline bool isAligned() const {
3005  return vector_.isAligned();
3006  }
3007  //**********************************************************************************************
3008 
3009  //**********************************************************************************************
3014  inline bool canSMPAssign() const {
3015  typename VMM::RightOperand A( vector_.rightOperand() );
3016  return ( !BLAZE_BLAS_IS_PARALLEL ||
3017  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3018  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) ) &&
3020  }
3021  //**********************************************************************************************
3022 
3023  private:
3024  //**Member variables****************************************************************************
3025  LeftOperand vector_;
3026  RightOperand scalar_;
3027  //**********************************************************************************************
3028 
3029  //**Assignment to dense vectors*****************************************************************
3041  template< typename VT1 // Type of the target dense vector
3042  , bool TF > // Transpose flag of the target dense vector
3043  friend inline void assign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
3044  {
3046 
3047  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3048 
3049  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3050  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3051 
3052  if( right.rows() == 0UL ) {
3053  reset( ~lhs );
3054  return;
3055  }
3056  else if( right.columns() == 0UL ) {
3057  return;
3058  }
3059 
3060  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
3061  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3062 
3063  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3064  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3065  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3066  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3067 
3068  DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
3069  }
3070  //**********************************************************************************************
3071 
3072  //**Assignment to dense vectors (kernel selection)**********************************************
3083  template< typename VT1 // Type of the left-hand side target vector
3084  , typename VT2 // Type of the left-hand side vector operand
3085  , typename MT1 // Type of the right-hand side matrix operand
3086  , typename ST2 > // Type of the scalar value
3087  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3088  {
3089  if( ( IsDiagonal<MT1>::value ) ||
3090  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3091  ( A.rows() * A.columns() < TDVECTDMATMULT_THRESHOLD ) )
3092  selectSmallAssignKernel( y, x, A, scalar );
3093  else
3094  selectBlasAssignKernel( y, x, A, scalar );
3095  }
3096  //**********************************************************************************************
3097 
3098  //**Default assignment to dense vectors*********************************************************
3112  template< typename VT1 // Type of the left-hand side target vector
3113  , typename VT2 // Type of the left-hand side vector operand
3114  , typename MT1 // Type of the right-hand side matrix operand
3115  , typename ST2 > // Type of the scalar value
3116  static inline void selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3117  {
3118  y.assign( x * A * scalar );
3119  }
3120  //**********************************************************************************************
3121 
3122  //**Default assignment to dense vectors (small matrices)****************************************
3136  template< typename VT1 // Type of the left-hand side target vector
3137  , typename VT2 // Type of the left-hand side vector operand
3138  , typename MT1 // Type of the right-hand side matrix operand
3139  , typename ST2 > // Type of the scalar value
3140  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3141  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3142  {
3143  selectDefaultAssignKernel( y, x, A, scalar );
3144  }
3145  //**********************************************************************************************
3146 
3147  //**Vectorized default assignment to dense vectors (small matrices)*****************************
3162  template< typename VT1 // Type of the left-hand side target vector
3163  , typename VT2 // Type of the left-hand side vector operand
3164  , typename MT1 // Type of the right-hand side matrix operand
3165  , typename ST2 > // Type of the scalar value
3166  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3167  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3168  {
3169  typedef IntrinsicTrait<ElementType> IT;
3170 
3171  const size_t M( A.rows() );
3172  const size_t N( A.columns() );
3173 
3174  size_t j( 0UL );
3175 
3176  for( ; (j+8UL) <= N; j+=8UL )
3177  {
3178  const size_t ibegin( ( IsLower<MT1>::value )
3179  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3180  :( 0UL ) );
3181  const size_t iend( ( IsUpper<MT1>::value )
3182  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
3183  :( M ) );
3184  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3185 
3186  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3187 
3188  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3189  const IntrinsicType x1( x.load(i) );
3190  xmm1 = xmm1 + x1 * A.load(i,j );
3191  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3192  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3193  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
3194  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
3195  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
3196  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
3197  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
3198  }
3199 
3200  y[j ] = sum( xmm1 ) * scalar;
3201  y[j+1UL] = sum( xmm2 ) * scalar;
3202  y[j+2UL] = sum( xmm3 ) * scalar;
3203  y[j+3UL] = sum( xmm4 ) * scalar;
3204  y[j+4UL] = sum( xmm5 ) * scalar;
3205  y[j+5UL] = sum( xmm6 ) * scalar;
3206  y[j+6UL] = sum( xmm7 ) * scalar;
3207  y[j+7UL] = sum( xmm8 ) * scalar;
3208  }
3209 
3210  for( ; (j+4UL) <= N; j+=4UL )
3211  {
3212  const size_t ibegin( ( IsLower<MT1>::value )
3213  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3214  :( 0UL ) );
3215  const size_t iend( ( IsUpper<MT1>::value )
3216  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
3217  :( M ) );
3218  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3219 
3220  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3221 
3222  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3223  const IntrinsicType x1( x.load(i) );
3224  xmm1 = xmm1 + x1 * A.load(i,j );
3225  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3226  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3227  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
3228  }
3229 
3230  y[j ] = sum( xmm1 ) * scalar;
3231  y[j+1UL] = sum( xmm2 ) * scalar;
3232  y[j+2UL] = sum( xmm3 ) * scalar;
3233  y[j+3UL] = sum( xmm4 ) * scalar;
3234  }
3235 
3236  for( ; (j+3UL) <= N; j+=3UL )
3237  {
3238  const size_t ibegin( ( IsLower<MT1>::value )
3239  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3240  :( 0UL ) );
3241  const size_t iend( ( IsUpper<MT1>::value )
3242  ?( IsStrictlyUpper<MT1>::value ? j+2UL : j+3UL )
3243  :( M ) );
3244  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3245 
3246  IntrinsicType xmm1, xmm2, xmm3;
3247 
3248  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3249  const IntrinsicType x1( x.load(i) );
3250  xmm1 = xmm1 + x1 * A.load(i,j );
3251  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3252  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3253  }
3254 
3255  y[j ] = sum( xmm1 ) * scalar;
3256  y[j+1UL] = sum( xmm2 ) * scalar;
3257  y[j+2UL] = sum( xmm3 ) * scalar;
3258  }
3259 
3260  for( ; (j+2UL) <= N; j+=2UL )
3261  {
3262  const size_t ibegin( ( IsLower<MT1>::value )
3263  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3264  :( 0UL ) );
3265  const size_t iend( ( IsUpper<MT1>::value )
3266  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
3267  :( M ) );
3268  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3269 
3270  IntrinsicType xmm1, xmm2;
3271 
3272  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3273  const IntrinsicType x1( x.load(i) );
3274  xmm1 = xmm1 + x1 * A.load(i,j );
3275  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3276  }
3277 
3278  y[j ] = sum( xmm1 ) * scalar;
3279  y[j+1UL] = sum( xmm2 ) * scalar;
3280  }
3281 
3282  if( j < N )
3283  {
3284  const size_t ibegin( ( IsLower<MT1>::value )
3285  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3286  :( 0UL ) );
3287  const size_t iend( ( IsUpper<MT1>::value )
3288  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
3289  :( M ) );
3290  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3291 
3292  IntrinsicType xmm1;
3293 
3294  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3295  xmm1 = xmm1 + A.load(i,j) * x.load(i);
3296  }
3297 
3298  y[j] = sum( xmm1 ) * scalar;
3299  }
3300  }
3301  //**********************************************************************************************
3302 
3303  //**Default assignment to dense vectors (large matrices)****************************************
3317  template< typename VT1 // Type of the left-hand side target vector
3318  , typename VT2 // Type of the left-hand side vector operand
3319  , typename MT1 // Type of the right-hand side matrix operand
3320  , typename ST2 > // Type of the scalar value
3321  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3322  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3323  {
3324  selectDefaultAssignKernel( y, x, A, scalar );
3325  }
3326  //**********************************************************************************************
3327 
3328  //**Vectorized default assignment to dense vectors (large matrices)*****************************
3343  template< typename VT1 // Type of the left-hand side target vector
3344  , typename VT2 // Type of the left-hand side vector operand
3345  , typename MT1 // Type of the right-hand side matrix operand
3346  , typename ST2 > // Type of the scalar value
3347  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3348  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3349  {
3350  typedef IntrinsicTrait<ElementType> IT;
3351 
3352  const size_t M( A.rows() );
3353  const size_t N( A.columns() );
3354 
3355  reset( y );
3356 
3357  size_t j( 0UL );
3358 
3359  for( ; (j+8UL) <= N; j+=8UL )
3360  {
3361  const size_t ibegin( ( IsLower<MT1>::value )
3362  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3363  :( 0UL ) );
3364  const size_t iend( ( IsUpper<MT1>::value )
3365  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
3366  :( M ) );
3367  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3368 
3369  size_t i( ibegin );
3370 
3371  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
3372  const size_t i1( i+IT::size );
3373  const size_t i2( i+IT::size*2UL );
3374  const size_t i3( i+IT::size*3UL );
3375  const IntrinsicType x1( x.load(i ) );
3376  const IntrinsicType x2( x.load(i1) );
3377  const IntrinsicType x3( x.load(i2) );
3378  const IntrinsicType x4( x.load(i3) );
3379  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
3380  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
3381  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
3382  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
3383  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) + x3 * A.load(i2,j+4UL) + x4 * A.load(i3,j+4UL) );
3384  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) + x3 * A.load(i2,j+5UL) + x4 * A.load(i3,j+5UL) );
3385  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) + x3 * A.load(i2,j+6UL) + x4 * A.load(i3,j+6UL) );
3386  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) + x3 * A.load(i2,j+7UL) + x4 * A.load(i3,j+7UL) );
3387  }
3388 
3389  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
3390  const size_t i1( i+IT::size );
3391  const IntrinsicType x1( x.load(i ) );
3392  const IntrinsicType x2( x.load(i1) );
3393  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
3394  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
3395  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
3396  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
3397  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) );
3398  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) );
3399  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) );
3400  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) );
3401  }
3402 
3403  if( i < iend ) {
3404  const IntrinsicType x1( x.load(i) );
3405  y[j ] += sum( x1 * A.load(i,j ) );
3406  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
3407  y[j+2UL] += sum( x1 * A.load(i,j+2UL) );
3408  y[j+3UL] += sum( x1 * A.load(i,j+3UL) );
3409  y[j+4UL] += sum( x1 * A.load(i,j+4UL) );
3410  y[j+5UL] += sum( x1 * A.load(i,j+5UL) );
3411  y[j+6UL] += sum( x1 * A.load(i,j+6UL) );
3412  y[j+7UL] += sum( x1 * A.load(i,j+7UL) );
3413  }
3414 
3415  y[j ] *= scalar;
3416  y[j+1UL] *= scalar;
3417  y[j+2UL] *= scalar;
3418  y[j+3UL] *= scalar;
3419  y[j+4UL] *= scalar;
3420  y[j+5UL] *= scalar;
3421  y[j+6UL] *= scalar;
3422  y[j+7UL] *= scalar;
3423  }
3424 
3425  for( ; (j+4UL) <= N; j+=4UL )
3426  {
3427  const size_t ibegin( ( IsLower<MT1>::value )
3428  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3429  :( 0UL ) );
3430  const size_t iend( ( IsUpper<MT1>::value )
3431  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
3432  :( M ) );
3433  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3434 
3435  size_t i( ibegin );
3436 
3437  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
3438  const size_t i1( i+IT::size );
3439  const size_t i2( i+IT::size*2UL );
3440  const size_t i3( i+IT::size*3UL );
3441  const IntrinsicType x1( x.load(i ) );
3442  const IntrinsicType x2( x.load(i1) );
3443  const IntrinsicType x3( x.load(i2) );
3444  const IntrinsicType x4( x.load(i3) );
3445  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
3446  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
3447  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) );
3448  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) );
3449  }
3450 
3451  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
3452  const size_t i1( i+IT::size );
3453  const IntrinsicType x1( x.load(i ) );
3454  const IntrinsicType x2( x.load(i1) );
3455  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
3456  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
3457  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) );
3458  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) );
3459  }
3460 
3461  if( i < iend ) {
3462  const IntrinsicType x1( x.load(i) );
3463  y[j ] += sum( x1 * A.load(i,j ) );
3464  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
3465  y[j+2UL] += sum( x1 * A.load(i,j+2UL) );
3466  y[j+3UL] += sum( x1 * A.load(i,j+3UL) );
3467  }
3468 
3469  y[j ] *= scalar;
3470  y[j+1UL] *= scalar;
3471  y[j+2UL] *= scalar;
3472  y[j+3UL] *= scalar;
3473  }
3474 
3475  for( ; (j+2UL) <= N; j+=2UL )
3476  {
3477  const size_t ibegin( ( IsLower<MT1>::value )
3478  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3479  :( 0UL ) );
3480  const size_t iend( ( IsUpper<MT1>::value )
3481  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
3482  :( M ) );
3483  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3484 
3485  size_t i( ibegin );
3486 
3487  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
3488  const size_t i1( i+IT::size );
3489  const size_t i2( i+IT::size*2UL );
3490  const size_t i3( i+IT::size*3UL );
3491  const IntrinsicType x1( x.load(i ) );
3492  const IntrinsicType x2( x.load(i1) );
3493  const IntrinsicType x3( x.load(i2) );
3494  const IntrinsicType x4( x.load(i3) );
3495  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) );
3496  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) );
3497  }
3498 
3499  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
3500  const size_t i1( i+IT::size );
3501  const IntrinsicType x1( x.load(i ) );
3502  const IntrinsicType x2( x.load(i1) );
3503  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) );
3504  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) );
3505  }
3506 
3507  if( i < iend ) {
3508  const IntrinsicType x1( x.load(i) );
3509  y[j ] += sum( x1 * A.load(i,j ) );
3510  y[j+1UL] += sum( x1 * A.load(i,j+1UL) );
3511  }
3512 
3513  y[j ] *= scalar;
3514  y[j+1UL] *= scalar;
3515  }
3516 
3517  if( j < N )
3518  {
3519  const size_t ibegin( ( IsLower<MT1>::value )
3520  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3521  :( 0UL ) );
3522  const size_t iend( ( IsUpper<MT1>::value )
3523  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
3524  :( M ) );
3525  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3526 
3527  size_t i( ibegin );
3528 
3529  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
3530  const size_t i1( i+IT::size );
3531  const size_t i2( i+IT::size*2UL );
3532  const size_t i3( i+IT::size*3UL );
3533  const IntrinsicType x1( x.load(i ) );
3534  const IntrinsicType x2( x.load(i1) );
3535  const IntrinsicType x3( x.load(i2) );
3536  const IntrinsicType x4( x.load(i3) );
3537  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) + x3 * A.load(i2,j) + x4 * A.load(i3,j) );
3538  }
3539 
3540  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
3541  const size_t i1( i+IT::size );
3542  const IntrinsicType x1( x.load(i ) );
3543  const IntrinsicType x2( x.load(i1) );
3544  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) );
3545  }
3546 
3547  if( i < iend ) {
3548  const IntrinsicType x1( x.load(i) );
3549  y[j] += sum( x1 * A.load(i,j) );
3550  }
3551 
3552  y[j] *= scalar;
3553  }
3554  }
3555  //**********************************************************************************************
3556 
3557  //**BLAS-based assignment to dense vectors (default)********************************************
3570  template< typename VT1 // Type of the left-hand side target vector
3571  , typename VT2 // Type of the left-hand side vector operand
3572  , typename MT1 // Type of the right-hand side matrix operand
3573  , typename ST2 > // Type of the scalar value
3574  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3575  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3576  {
3577  selectLargeAssignKernel( y, x, A, scalar );
3578  }
3579  //**********************************************************************************************
3580 
3581  //**BLAS-based assignment to dense vectors (single precision)***********************************
3582 #if BLAZE_BLAS_MODE
3583 
3596  template< typename VT1 // Type of the left-hand side target vector
3597  , typename VT2 // Type of the left-hand side vector operand
3598  , typename MT1 // Type of the right-hand side matrix operand
3599  , typename ST2 > // Type of the scalar value
3600  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3601  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3602  {
3603  if( IsTriangular<MT1>::value ) {
3604  assign( y, scalar * x );
3605  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3606  }
3607  else {
3608  sgemv( y, x, A, scalar, 0.0F );
3609  }
3610  }
3611 #endif
3612  //**********************************************************************************************
3613 
3614  //**BLAS-based assignment to dense vectors (double precision)***********************************
3615 #if BLAZE_BLAS_MODE
3616 
3629  template< typename VT1 // Type of the left-hand side target vector
3630  , typename VT2 // Type of the left-hand side vector operand
3631  , typename MT1 // Type of the right-hand side matrix operand
3632  , typename ST2 > // Type of the scalar value
3633  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3634  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3635  {
3636  if( IsTriangular<MT1>::value ) {
3637  assign( y, scalar * x );
3638  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3639  }
3640  else {
3641  dgemv( y, x, A, scalar, 0.0 );
3642  }
3643  }
3644 #endif
3645  //**********************************************************************************************
3646 
3647  //**BLAS-based assignment to dense vectors (single precision complex)***************************
3648 #if BLAZE_BLAS_MODE
3649 
3663  template< typename VT1 // Type of the left-hand side target vector
3664  , typename VT2 // Type of the left-hand side vector operand
3665  , typename MT1 // Type of the right-hand side matrix operand
3666  , typename ST2 > // Type of the scalar value
3667  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3668  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3669  {
3670  if( IsTriangular<MT1>::value ) {
3671  assign( y, scalar * x );
3672  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3673  }
3674  else {
3675  cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3676  }
3677  }
3678 #endif
3679  //**********************************************************************************************
3680 
3681  //**BLAS-based assignment to dense vectors (double precision complex)***************************
3682 #if BLAZE_BLAS_MODE
3683 
3697  template< typename VT1 // Type of the left-hand side target vector
3698  , typename VT2 // Type of the left-hand side vector operand
3699  , typename MT1 // Type of the right-hand side matrix operand
3700  , typename ST2 > // Type of the scalar value
3701  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3702  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3703  {
3704  if( IsTriangular<MT1>::value ) {
3705  assign( y, scalar * x );
3706  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3707  }
3708  else {
3709  zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3710  }
3711  }
3712 #endif
3713  //**********************************************************************************************
3714 
3715  //**Assignment to sparse vectors****************************************************************
3727  template< typename VT1 // Type of the target sparse vector
3728  , bool TF > // Transpose flag of the target sparse vector
3729  friend inline void assign( SparseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
3730  {
3732 
3736 
3737  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3738 
3739  const ResultType tmp( serial( rhs ) );
3740  assign( ~lhs, tmp );
3741  }
3742  //**********************************************************************************************
3743 
3744  //**Addition assignment to dense vectors********************************************************
3756  template< typename VT1 // Type of the target dense vector
3757  , bool TF > // Transpose flag of the target dense vector
3758  friend inline void addAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
3759  {
3761 
3762  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3763 
3764  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3765  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3766 
3767  if( right.rows() == 0UL || right.columns() == 0UL ) {
3768  return;
3769  }
3770 
3771  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
3772  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3773 
3774  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3775  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3776  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3777  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3778 
3779  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
3780  }
3781  //**********************************************************************************************
3782 
3783  //**Addition assignment to dense vectors (kernel selection)*************************************
3794  template< typename VT1 // Type of the left-hand side target vector
3795  , typename VT2 // Type of the left-hand side vector operand
3796  , typename MT1 // Type of the right-hand side matrix operand
3797  , typename ST2 > // Type of the scalar value
3798  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3799  {
3800  if( ( IsDiagonal<MT1>::value ) ||
3801  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3802  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3803  selectSmallAddAssignKernel( y, x, A, scalar );
3804  else
3805  selectBlasAddAssignKernel( y, x, A, scalar );
3806  }
3807  //**********************************************************************************************
3808 
3809  //**Default addition assignment to dense vectors************************************************
3823  template< typename VT1 // Type of the left-hand side target vector
3824  , typename VT2 // Type of the left-hand side vector operand
3825  , typename MT1 // Type of the right-hand side matrix operand
3826  , typename ST2 > // Type of the scalar value
3827  static inline void selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3828  {
3829  y.addAssign( x * A * scalar );
3830  }
3831  //**********************************************************************************************
3832 
3833  //**Default addition assignment to dense vectors (small matrices)*******************************
3847  template< typename VT1 // Type of the left-hand side target vector
3848  , typename VT2 // Type of the left-hand side vector operand
3849  , typename MT1 // Type of the right-hand side matrix operand
3850  , typename ST2 > // Type of the scalar value
3851  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3852  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3853  {
3854  selectDefaultAddAssignKernel( y, x, A, scalar );
3855  }
3856  //**********************************************************************************************
3857 
3858  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3873  template< typename VT1 // Type of the left-hand side target vector
3874  , typename VT2 // Type of the left-hand side vector operand
3875  , typename MT1 // Type of the right-hand side matrix operand
3876  , typename ST2 > // Type of the scalar value
3877  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3878  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3879  {
3880  typedef IntrinsicTrait<ElementType> IT;
3881 
3882  const size_t M( A.rows() );
3883  const size_t N( A.columns() );
3884 
3885  size_t j( 0UL );
3886 
3887  for( ; (j+8UL) <= N; j+=8UL )
3888  {
3889  const size_t ibegin( ( IsLower<MT1>::value )
3890  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3891  :( 0UL ) );
3892  const size_t iend( ( IsUpper<MT1>::value )
3893  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
3894  :( M ) );
3895  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3896 
3897  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3898 
3899  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3900  const IntrinsicType x1( x.load(i) );
3901  xmm1 = xmm1 + x1 * A.load(i,j );
3902  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3903  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3904  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
3905  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
3906  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
3907  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
3908  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
3909  }
3910 
3911  y[j ] += sum( xmm1 ) * scalar;
3912  y[j+1UL] += sum( xmm2 ) * scalar;
3913  y[j+2UL] += sum( xmm3 ) * scalar;
3914  y[j+3UL] += sum( xmm4 ) * scalar;
3915  y[j+4UL] += sum( xmm5 ) * scalar;
3916  y[j+5UL] += sum( xmm6 ) * scalar;
3917  y[j+6UL] += sum( xmm7 ) * scalar;
3918  y[j+7UL] += sum( xmm8 ) * scalar;
3919  }
3920 
3921  for( ; (j+4UL) <= N; j+=4UL )
3922  {
3923  const size_t ibegin( ( IsLower<MT1>::value )
3924  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3925  :( 0UL ) );
3926  const size_t iend( ( IsUpper<MT1>::value )
3927  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
3928  :( M ) );
3929  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3930 
3931  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3932 
3933  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3934  const IntrinsicType x1( x.load(i) );
3935  xmm1 = xmm1 + x1 * A.load(i,j );
3936  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3937  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3938  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
3939  }
3940 
3941  y[j ] += sum( xmm1 ) * scalar;
3942  y[j+1UL] += sum( xmm2 ) * scalar;
3943  y[j+2UL] += sum( xmm3 ) * scalar;
3944  y[j+3UL] += sum( xmm4 ) * scalar;
3945  }
3946 
3947  for( ; (j+3UL) <= N; j+=3UL )
3948  {
3949  const size_t ibegin( ( IsLower<MT1>::value )
3950  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3951  :( 0UL ) );
3952  const size_t iend( ( IsUpper<MT1>::value )
3953  ?( IsStrictlyUpper<MT1>::value ? j+2UL : j+3UL )
3954  :( M ) );
3955  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3956 
3957  IntrinsicType xmm1, xmm2, xmm3;
3958 
3959  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3960  const IntrinsicType x1( x.load(i) );
3961  xmm1 = xmm1 + x1 * A.load(i,j );
3962  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3963  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3964  }
3965 
3966  y[j ] += sum( xmm1 ) * scalar;
3967  y[j+1UL] += sum( xmm2 ) * scalar;
3968  y[j+2UL] += sum( xmm3 ) * scalar;
3969  }
3970 
3971  for( ; (j+2UL) <= N; j+=2UL )
3972  {
3973  const size_t ibegin( ( IsLower<MT1>::value )
3974  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3975  :( 0UL ) );
3976  const size_t iend( ( IsUpper<MT1>::value )
3977  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
3978  :( M ) );
3979  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3980 
3981  IntrinsicType xmm1, xmm2;
3982 
3983  for( size_t i=ibegin; i<iend; i+=IT::size ) {
3984  const IntrinsicType x1( x.load(i) );
3985  xmm1 = xmm1 + x1 * A.load(i,j );
3986  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3987  }
3988 
3989  y[j ] += sum( xmm1 ) * scalar;
3990  y[j+1UL] += sum( xmm2 ) * scalar;
3991  }
3992 
3993  if( j < N )
3994  {
3995  const size_t ibegin( ( IsLower<MT1>::value )
3996  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
3997  :( 0UL ) );
3998  const size_t iend( ( IsUpper<MT1>::value )
3999  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
4000  :( M ) );
4001  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4002 
4003  IntrinsicType xmm1;
4004 
4005  for( size_t i=ibegin; i<iend; i+=IT::size ) {
4006  xmm1 = xmm1 + A.load(i,j) * x.load(i);
4007  }
4008 
4009  y[j] += sum( xmm1 ) * scalar;
4010  }
4011  }
4012  //**********************************************************************************************
4013 
4014  //**Default addition assignment to dense vectors (large matrices)*******************************
4028  template< typename VT1 // Type of the left-hand side target vector
4029  , typename VT2 // Type of the left-hand side vector operand
4030  , typename MT1 // Type of the right-hand side matrix operand
4031  , typename ST2 > // Type of the scalar value
4032  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4033  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4034  {
4035  selectDefaultAddAssignKernel( y, x, A, scalar );
4036  }
4037  //**********************************************************************************************
4038 
4039  //**Vectorized default addition assignment to dense vectors (large matrices)********************
4054  template< typename VT1 // Type of the left-hand side target vector
4055  , typename VT2 // Type of the left-hand side vector operand
4056  , typename MT1 // Type of the right-hand side matrix operand
4057  , typename ST2 > // Type of the scalar value
4058  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4059  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4060  {
4061  typedef IntrinsicTrait<ElementType> IT;
4062 
4063  const size_t M( A.rows() );
4064  const size_t N( A.columns() );
4065 
4066  size_t j( 0UL );
4067 
4068  for( ; (j+8UL) <= N; j+=8UL )
4069  {
4070  const size_t ibegin( ( IsLower<MT1>::value )
4071  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4072  :( 0UL ) );
4073  const size_t iend( ( IsUpper<MT1>::value )
4074  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
4075  :( M ) );
4076  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4077 
4078  size_t i( ibegin );
4079 
4080  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4081  const size_t i1( i+IT::size );
4082  const size_t i2( i+IT::size*2UL );
4083  const size_t i3( i+IT::size*3UL );
4084  const IntrinsicType x1( x.load(i ) );
4085  const IntrinsicType x2( x.load(i1) );
4086  const IntrinsicType x3( x.load(i2) );
4087  const IntrinsicType x4( x.load(i3) );
4088  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) ) * scalar;
4089  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) ) * scalar;
4090  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) ) * scalar;
4091  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) ) * scalar;
4092  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) + x3 * A.load(i2,j+4UL) + x4 * A.load(i3,j+4UL) ) * scalar;
4093  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) + x3 * A.load(i2,j+5UL) + x4 * A.load(i3,j+5UL) ) * scalar;
4094  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) + x3 * A.load(i2,j+6UL) + x4 * A.load(i3,j+6UL) ) * scalar;
4095  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) + x3 * A.load(i2,j+7UL) + x4 * A.load(i3,j+7UL) ) * scalar;
4096  }
4097 
4098  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4099  const size_t i1( i+IT::size );
4100  const IntrinsicType x1( x.load(i ) );
4101  const IntrinsicType x2( x.load(i1) );
4102  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) ) * scalar;
4103  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) ) * scalar;
4104  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) ) * scalar;
4105  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) ) * scalar;
4106  y[j+4UL] += sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) ) * scalar;
4107  y[j+5UL] += sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) ) * scalar;
4108  y[j+6UL] += sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) ) * scalar;
4109  y[j+7UL] += sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) ) * scalar;
4110  }
4111 
4112  if( i < iend ) {
4113  const IntrinsicType x1( x.load(i) );
4114  y[j ] += sum( x1 * A.load(i,j ) ) * scalar;
4115  y[j+1UL] += sum( x1 * A.load(i,j+1UL) ) * scalar;
4116  y[j+2UL] += sum( x1 * A.load(i,j+2UL) ) * scalar;
4117  y[j+3UL] += sum( x1 * A.load(i,j+3UL) ) * scalar;
4118  y[j+4UL] += sum( x1 * A.load(i,j+4UL) ) * scalar;
4119  y[j+5UL] += sum( x1 * A.load(i,j+5UL) ) * scalar;
4120  y[j+6UL] += sum( x1 * A.load(i,j+6UL) ) * scalar;
4121  y[j+7UL] += sum( x1 * A.load(i,j+7UL) ) * scalar;
4122  }
4123  }
4124 
4125  for( ; (j+4UL) <= N; j+=4UL )
4126  {
4127  const size_t ibegin( ( IsLower<MT1>::value )
4128  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4129  :( 0UL ) );
4130  const size_t iend( ( IsUpper<MT1>::value )
4131  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
4132  :( M ) );
4133  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4134 
4135  size_t i( ibegin );
4136 
4137  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4138  const size_t i1( i+IT::size );
4139  const size_t i2( i+IT::size*2UL );
4140  const size_t i3( i+IT::size*3UL );
4141  const IntrinsicType x1( x.load(i ) );
4142  const IntrinsicType x2( x.load(i1) );
4143  const IntrinsicType x3( x.load(i2) );
4144  const IntrinsicType x4( x.load(i3) );
4145  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) ) * scalar;
4146  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) ) * scalar;
4147  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) ) * scalar;
4148  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) ) * scalar;
4149  }
4150 
4151  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4152  const size_t i1( i+IT::size );
4153  const IntrinsicType x1( x.load(i ) );
4154  const IntrinsicType x2( x.load(i1) );
4155  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) ) * scalar;
4156  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) ) * scalar;
4157  y[j+2UL] += sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) ) * scalar;
4158  y[j+3UL] += sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) ) * scalar;
4159  }
4160 
4161  if( i < iend ) {
4162  const IntrinsicType x1( x.load(i) );
4163  y[j ] += sum( x1 * A.load(i,j ) ) * scalar;
4164  y[j+1UL] += sum( x1 * A.load(i,j+1UL) ) * scalar;
4165  y[j+2UL] += sum( x1 * A.load(i,j+2UL) ) * scalar;
4166  y[j+3UL] += sum( x1 * A.load(i,j+3UL) ) * scalar;
4167  }
4168  }
4169 
4170  for( ; (j+2UL) <= N; j+=2UL )
4171  {
4172  const size_t ibegin( ( IsLower<MT1>::value )
4173  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4174  :( 0UL ) );
4175  const size_t iend( ( IsUpper<MT1>::value )
4176  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
4177  :( M ) );
4178  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4179 
4180  size_t i( ibegin );
4181 
4182  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4183  const size_t i1( i+IT::size );
4184  const size_t i2( i+IT::size*2UL );
4185  const size_t i3( i+IT::size*3UL );
4186  const IntrinsicType x1( x.load(i ) );
4187  const IntrinsicType x2( x.load(i1) );
4188  const IntrinsicType x3( x.load(i2) );
4189  const IntrinsicType x4( x.load(i3) );
4190  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) ) * scalar;
4191  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) ) * scalar;
4192  }
4193 
4194  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4195  const size_t i1( i+IT::size );
4196  const IntrinsicType x1( x.load(i ) );
4197  const IntrinsicType x2( x.load(i1) );
4198  y[j ] += sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) ) * scalar;
4199  y[j+1UL] += sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) ) * scalar;
4200  }
4201 
4202  if( i < iend ) {
4203  const IntrinsicType x1( x.load(i) );
4204  y[j ] += sum( x1 * A.load(i,j ) ) * scalar;
4205  y[j+1UL] += sum( x1 * A.load(i,j+1UL) ) * scalar;
4206  }
4207  }
4208 
4209  if( j < N )
4210  {
4211  const size_t ibegin( ( IsLower<MT1>::value )
4212  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4213  :( 0UL ) );
4214  const size_t iend( ( IsUpper<MT1>::value )
4215  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
4216  :( M ) );
4217  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4218 
4219  size_t i( ibegin );
4220 
4221  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4222  const size_t i1( i+IT::size );
4223  const size_t i2( i+IT::size*2UL );
4224  const size_t i3( i+IT::size*3UL );
4225  const IntrinsicType x1( x.load(i ) );
4226  const IntrinsicType x2( x.load(i1) );
4227  const IntrinsicType x3( x.load(i2) );
4228  const IntrinsicType x4( x.load(i3) );
4229  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) + x3 * A.load(i2,j) + x4 * A.load(i3,j) ) * scalar;
4230  }
4231 
4232  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4233  const size_t i1( i+IT::size );
4234  const IntrinsicType x1( x.load(i ) );
4235  const IntrinsicType x2( x.load(i1) );
4236  y[j] += sum( x1 * A.load(i,j) + x2 * A.load(i1,j) ) * scalar;
4237  }
4238 
4239  if( i < iend ) {
4240  const IntrinsicType x1( x.load(i) );
4241  y[j] += sum( x1 * A.load(i,j) ) * scalar;
4242  }
4243  }
4244  }
4245  //**********************************************************************************************
4246 
4247  //**BLAS-based addition assignment to dense vectors (default)***********************************
4262  template< typename VT1 // Type of the left-hand side target vector
4263  , typename VT2 // Type of the left-hand side vector operand
4264  , typename MT1 // Type of the right-hand side matrix operand
4265  , typename ST2 > // Type of the scalar value
4266  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4267  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4268  {
4269  selectLargeAddAssignKernel( y, x, A, scalar );
4270  }
4271  //**********************************************************************************************
4272 
4273  //**BLAS-based addition assignment to dense vectors (single precision)**************************
4274 #if BLAZE_BLAS_MODE
4275 
4288  template< typename VT1 // Type of the left-hand side target vector
4289  , typename VT2 // Type of the left-hand side vector operand
4290  , typename MT1 // Type of the right-hand side matrix operand
4291  , typename ST2 > // Type of the scalar value
4292  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4293  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4294  {
4295  if( IsTriangular<MT1>::value ) {
4296  typename VT1::ResultType tmp( scalar * x );
4297  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4298  addAssign( y, tmp );
4299  }
4300  else {
4301  sgemv( y, x, A, scalar, 1.0F );
4302  }
4303  }
4304 #endif
4305  //**********************************************************************************************
4306 
4307  //**BLAS-based addition assignment to dense vectors (double precision)**************************
4308 #if BLAZE_BLAS_MODE
4309 
4322  template< typename VT1 // Type of the left-hand side target vector
4323  , typename VT2 // Type of the left-hand side vector operand
4324  , typename MT1 // Type of the right-hand side matrix operand
4325  , typename ST2 > // Type of the scalar value
4326  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4327  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4328  {
4329  if( IsTriangular<MT1>::value ) {
4330  typename VT1::ResultType tmp( scalar * x );
4331  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4332  addAssign( y, tmp );
4333  }
4334  else {
4335  dgemv( y, x, A, scalar, 1.0 );
4336  }
4337  }
4338 #endif
4339  //**********************************************************************************************
4340 
4341  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
4342 #if BLAZE_BLAS_MODE
4343 
4357  template< typename VT1 // Type of the left-hand side target vector
4358  , typename VT2 // Type of the left-hand side vector operand
4359  , typename MT1 // Type of the right-hand side matrix operand
4360  , typename ST2 > // Type of the scalar value
4361  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4362  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4363  {
4364  if( IsTriangular<MT1>::value ) {
4365  typename VT1::ResultType tmp( scalar * x );
4366  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4367  addAssign( y, tmp );
4368  }
4369  else {
4370  cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4371  }
4372  }
4373 #endif
4374  //**********************************************************************************************
4375 
4376  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
4377 #if BLAZE_BLAS_MODE
4378 
4392  template< typename VT1 // Type of the left-hand side target vector
4393  , typename VT2 // Type of the left-hand side vector operand
4394  , typename MT1 // Type of the right-hand side matrix operand
4395  , typename ST2 > // Type of the scalar value
4396  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4397  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4398  {
4399  if( IsTriangular<MT1>::value ) {
4400  typename VT1::ResultType tmp( scalar * x );
4401  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4402  addAssign( y, tmp );
4403  }
4404  else {
4405  zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4406  }
4407  }
4408 #endif
4409  //**********************************************************************************************
4410 
4411  //**Addition assignment to sparse vectors*******************************************************
4412  // No special implementation for the addition assignment to sparse vectors.
4413  //**********************************************************************************************
4414 
4415  //**Subtraction assignment to dense vectors*****************************************************
4427  template< typename VT1 // Type of the target dense vector
4428  , bool TF > // Transpose flag of the target dense vector
4429  friend inline void subAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
4430  {
4432 
4433  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4434 
4435  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4436  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4437 
4438  if( right.rows() == 0UL || right.columns() == 0UL ) {
4439  return;
4440  }
4441 
4442  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
4443  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4444 
4445  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4446  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4447  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
4448  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
4449 
4450  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
4451  }
4452  //**********************************************************************************************
4453 
4454  //**Subtraction assignment to dense vectors (kernel selection)**********************************
4465  template< typename VT1 // Type of the left-hand side target vector
4466  , typename VT2 // Type of the left-hand side vector operand
4467  , typename MT1 // Type of the right-hand side matrix operand
4468  , typename ST2 > // Type of the scalar value
4469  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4470  {
4471  if( ( IsDiagonal<MT1>::value ) ||
4472  ( IsComputation<MT>::value && !evaluateMatrix ) ||
4473  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
4474  selectSmallSubAssignKernel( y, x, A, scalar );
4475  else
4476  selectBlasSubAssignKernel( y, x, A, scalar );
4477  }
4478  //**********************************************************************************************
4479 
4480  //**Default subtraction assignment to dense vectors*********************************************
4494  template< typename VT1 // Type of the left-hand side target vector
4495  , typename VT2 // Type of the left-hand side vector operand
4496  , typename MT1 // Type of the right-hand side matrix operand
4497  , typename ST2 > // Type of the scalar value
4498  static inline void selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4499  {
4500  y.subAssign( x * A * scalar );
4501  }
4502  //**********************************************************************************************
4503 
4504  //**Default subtraction assignment to dense vectors (small matrices)****************************
4518  template< typename VT1 // Type of the left-hand side target vector
4519  , typename VT2 // Type of the left-hand side vector operand
4520  , typename MT1 // Type of the right-hand side matrix operand
4521  , typename ST2 > // Type of the scalar value
4522  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4523  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4524  {
4525  selectDefaultSubAssignKernel( y, x, A, scalar );
4526  }
4527  //**********************************************************************************************
4528 
4529  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
4544  template< typename VT1 // Type of the left-hand side target vector
4545  , typename VT2 // Type of the left-hand side vector operand
4546  , typename MT1 // Type of the right-hand side matrix operand
4547  , typename ST2 > // Type of the scalar value
4548  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4549  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4550  {
4551  typedef IntrinsicTrait<ElementType> IT;
4552 
4553  const size_t M( A.rows() );
4554  const size_t N( A.columns() );
4555 
4556  size_t j( 0UL );
4557 
4558  for( ; (j+8UL) <= N; j+=8UL )
4559  {
4560  const size_t ibegin( ( IsLower<MT1>::value )
4561  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4562  :( 0UL ) );
4563  const size_t iend( ( IsUpper<MT1>::value )
4564  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
4565  :( M ) );
4566  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4567 
4568  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4569 
4570  for( size_t i=ibegin; i<iend; i+=IT::size ) {
4571  const IntrinsicType x1( x.load(i) );
4572  xmm1 = xmm1 + x1 * A.load(i,j );
4573  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
4574  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
4575  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
4576  xmm5 = xmm5 + x1 * A.load(i,j+4UL);
4577  xmm6 = xmm6 + x1 * A.load(i,j+5UL);
4578  xmm7 = xmm7 + x1 * A.load(i,j+6UL);
4579  xmm8 = xmm8 + x1 * A.load(i,j+7UL);
4580  }
4581 
4582  y[j ] -= sum( xmm1 ) * scalar;
4583  y[j+1UL] -= sum( xmm2 ) * scalar;
4584  y[j+2UL] -= sum( xmm3 ) * scalar;
4585  y[j+3UL] -= sum( xmm4 ) * scalar;
4586  y[j+4UL] -= sum( xmm5 ) * scalar;
4587  y[j+5UL] -= sum( xmm6 ) * scalar;
4588  y[j+6UL] -= sum( xmm7 ) * scalar;
4589  y[j+7UL] -= sum( xmm8 ) * scalar;
4590  }
4591 
4592  for( ; (j+4UL) <= N; j+=4UL )
4593  {
4594  const size_t ibegin( ( IsLower<MT1>::value )
4595  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4596  :( 0UL ) );
4597  const size_t iend( ( IsUpper<MT1>::value )
4598  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
4599  :( M ) );
4600  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4601 
4602  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4603 
4604  for( size_t i=ibegin; i<iend; i+=IT::size ) {
4605  const IntrinsicType x1( x.load(i) );
4606  xmm1 = xmm1 + x1 * A.load(i,j );
4607  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
4608  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
4609  xmm4 = xmm4 + x1 * A.load(i,j+3UL);
4610  }
4611 
4612  y[j ] -= sum( xmm1 ) * scalar;
4613  y[j+1UL] -= sum( xmm2 ) * scalar;
4614  y[j+2UL] -= sum( xmm3 ) * scalar;
4615  y[j+3UL] -= sum( xmm4 ) * scalar;
4616  }
4617 
4618  for( ; (j+3UL) <= N; j+=3UL )
4619  {
4620  const size_t ibegin( ( IsLower<MT1>::value )
4621  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4622  :( 0UL ) );
4623  const size_t iend( ( IsUpper<MT1>::value )
4624  ?( IsStrictlyUpper<MT1>::value ? j+2UL : j+3UL )
4625  :( M ) );
4626  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4627 
4628  IntrinsicType xmm1, xmm2, xmm3;
4629 
4630  for( size_t i=ibegin; i<iend; i+=IT::size ) {
4631  const IntrinsicType x1( x.load(i) );
4632  xmm1 = xmm1 + x1 * A.load(i,j );
4633  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
4634  xmm3 = xmm3 + x1 * A.load(i,j+2UL);
4635  }
4636 
4637  y[j ] -= sum( xmm1 ) * scalar;
4638  y[j+1UL] -= sum( xmm2 ) * scalar;
4639  y[j+2UL] -= sum( xmm3 ) * scalar;
4640  }
4641 
4642  for( ; (j+2UL) <= N; j+=2UL )
4643  {
4644  const size_t ibegin( ( IsLower<MT1>::value )
4645  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4646  :( 0UL ) );
4647  const size_t iend( ( IsUpper<MT1>::value )
4648  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
4649  :( M ) );
4650  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4651 
4652  IntrinsicType xmm1, xmm2;
4653 
4654  for( size_t i=ibegin; i<iend; i+=IT::size ) {
4655  const IntrinsicType x1( x.load(i) );
4656  xmm1 = xmm1 + x1 * A.load(i,j );
4657  xmm2 = xmm2 + x1 * A.load(i,j+1UL);
4658  }
4659 
4660  y[j ] -= sum( xmm1 ) * scalar;
4661  y[j+1UL] -= sum( xmm2 ) * scalar;
4662  }
4663 
4664  if( j < N )
4665  {
4666  const size_t ibegin( ( IsLower<MT1>::value )
4667  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4668  :( 0UL ) );
4669  const size_t iend( ( IsUpper<MT1>::value )
4670  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
4671  :( M ) );
4672  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4673 
4674  IntrinsicType xmm1;
4675 
4676  for( size_t i=ibegin; i<iend; i+=IT::size ) {
4677  xmm1 = xmm1 + A.load(i,j) * x.load(i);
4678  }
4679 
4680  y[j] -= sum( xmm1 ) * scalar;
4681  }
4682  }
4683  //**********************************************************************************************
4684 
4685  //**Default subtraction assignment to dense vectors (large matrices)****************************
4699  template< typename VT1 // Type of the left-hand side target vector
4700  , typename VT2 // Type of the left-hand side vector operand
4701  , typename MT1 // Type of the right-hand side matrix operand
4702  , typename ST2 > // Type of the scalar value
4703  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4704  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4705  {
4706  selectDefaultSubAssignKernel( y, x, A, scalar );
4707  }
4708  //**********************************************************************************************
4709 
4710  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4725  template< typename VT1 // Type of the left-hand side target vector
4726  , typename VT2 // Type of the left-hand side vector operand
4727  , typename MT1 // Type of the right-hand side matrix operand
4728  , typename ST2 > // Type of the scalar value
4729  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4730  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4731  {
4732  typedef IntrinsicTrait<ElementType> IT;
4733 
4734  const size_t M( A.rows() );
4735  const size_t N( A.columns() );
4736 
4737  size_t j( 0UL );
4738 
4739  for( ; (j+8UL) <= N; j+=8UL )
4740  {
4741  const size_t ibegin( ( IsLower<MT1>::value )
4742  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4743  :( 0UL ) );
4744  const size_t iend( ( IsUpper<MT1>::value )
4745  ?( IsStrictlyUpper<MT1>::value ? j+7UL : j+8UL )
4746  :( M ) );
4747  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4748 
4749  size_t i( ibegin );
4750 
4751  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4752  const size_t i1( i+IT::size );
4753  const size_t i2( i+IT::size*2UL );
4754  const size_t i3( i+IT::size*3UL );
4755  const IntrinsicType x1( x.load(i ) );
4756  const IntrinsicType x2( x.load(i1) );
4757  const IntrinsicType x3( x.load(i2) );
4758  const IntrinsicType x4( x.load(i3) );
4759  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) ) * scalar;
4760  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) ) * scalar;
4761  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) ) * scalar;
4762  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) ) * scalar;
4763  y[j+4UL] -= sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) + x3 * A.load(i2,j+4UL) + x4 * A.load(i3,j+4UL) ) * scalar;
4764  y[j+5UL] -= sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) + x3 * A.load(i2,j+5UL) + x4 * A.load(i3,j+5UL) ) * scalar;
4765  y[j+6UL] -= sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) + x3 * A.load(i2,j+6UL) + x4 * A.load(i3,j+6UL) ) * scalar;
4766  y[j+7UL] -= sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) + x3 * A.load(i2,j+7UL) + x4 * A.load(i3,j+7UL) ) * scalar;
4767  }
4768 
4769  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4770  const size_t i1( i+IT::size );
4771  const IntrinsicType x1( x.load(i ) );
4772  const IntrinsicType x2( x.load(i1) );
4773  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) ) * scalar;
4774  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) ) * scalar;
4775  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) ) * scalar;
4776  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) ) * scalar;
4777  y[j+4UL] -= sum( x1 * A.load(i,j+4UL) + x2 * A.load(i1,j+4UL) ) * scalar;
4778  y[j+5UL] -= sum( x1 * A.load(i,j+5UL) + x2 * A.load(i1,j+5UL) ) * scalar;
4779  y[j+6UL] -= sum( x1 * A.load(i,j+6UL) + x2 * A.load(i1,j+6UL) ) * scalar;
4780  y[j+7UL] -= sum( x1 * A.load(i,j+7UL) + x2 * A.load(i1,j+7UL) ) * scalar;
4781  }
4782 
4783  if( i < iend ) {
4784  const IntrinsicType x1( x.load(i) );
4785  y[j ] -= sum( x1 * A.load(i,j ) ) * scalar;
4786  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) ) * scalar;
4787  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) ) * scalar;
4788  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) ) * scalar;
4789  y[j+4UL] -= sum( x1 * A.load(i,j+4UL) ) * scalar;
4790  y[j+5UL] -= sum( x1 * A.load(i,j+5UL) ) * scalar;
4791  y[j+6UL] -= sum( x1 * A.load(i,j+6UL) ) * scalar;
4792  y[j+7UL] -= sum( x1 * A.load(i,j+7UL) ) * scalar;
4793  }
4794  }
4795 
4796  for( ; (j+4UL) <= N; j+=4UL )
4797  {
4798  const size_t ibegin( ( IsLower<MT1>::value )
4799  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4800  :( 0UL ) );
4801  const size_t iend( ( IsUpper<MT1>::value )
4802  ?( IsStrictlyUpper<MT1>::value ? j+3UL : j+4UL )
4803  :( M ) );
4804  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4805 
4806  size_t i( ibegin );
4807 
4808  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4809  const size_t i1( i+IT::size );
4810  const size_t i2( i+IT::size*2UL );
4811  const size_t i3( i+IT::size*3UL );
4812  const IntrinsicType x1( x.load(i ) );
4813  const IntrinsicType x2( x.load(i1) );
4814  const IntrinsicType x3( x.load(i2) );
4815  const IntrinsicType x4( x.load(i3) );
4816  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) ) * scalar;
4817  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) ) * scalar;
4818  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) + x3 * A.load(i2,j+2UL) + x4 * A.load(i3,j+2UL) ) * scalar;
4819  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) + x3 * A.load(i2,j+3UL) + x4 * A.load(i3,j+3UL) ) * scalar;
4820  }
4821 
4822  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4823  const size_t i1( i+IT::size );
4824  const IntrinsicType x1( x.load(i ) );
4825  const IntrinsicType x2( x.load(i1) );
4826  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) ) * scalar;
4827  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) ) * scalar;
4828  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) + x2 * A.load(i1,j+2UL) ) * scalar;
4829  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) + x2 * A.load(i1,j+3UL) ) * scalar;
4830  }
4831 
4832  if( i < iend ) {
4833  const IntrinsicType x1( x.load(i) );
4834  y[j ] -= sum( x1 * A.load(i,j ) ) * scalar;
4835  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) ) * scalar;
4836  y[j+2UL] -= sum( x1 * A.load(i,j+2UL) ) * scalar;
4837  y[j+3UL] -= sum( x1 * A.load(i,j+3UL) ) * scalar;
4838  }
4839  }
4840 
4841  for( ; (j+2UL) <= N; j+=2UL )
4842  {
4843  const size_t ibegin( ( IsLower<MT1>::value )
4844  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4845  :( 0UL ) );
4846  const size_t iend( ( IsUpper<MT1>::value )
4847  ?( IsStrictlyUpper<MT1>::value ? j+1UL : j+2UL )
4848  :( M ) );
4849  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4850 
4851  size_t i( ibegin );
4852 
4853  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4854  const size_t i1( i+IT::size );
4855  const size_t i2( i+IT::size*2UL );
4856  const size_t i3( i+IT::size*3UL );
4857  const IntrinsicType x1( x.load(i ) );
4858  const IntrinsicType x2( x.load(i1) );
4859  const IntrinsicType x3( x.load(i2) );
4860  const IntrinsicType x4( x.load(i3) );
4861  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) + x3 * A.load(i2,j ) + x4 * A.load(i3,j ) ) * scalar;
4862  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) + x3 * A.load(i2,j+1UL) + x4 * A.load(i3,j+1UL) ) * scalar;
4863  }
4864 
4865  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4866  const size_t i1( i+IT::size );
4867  const IntrinsicType x1( x.load(i ) );
4868  const IntrinsicType x2( x.load(i1) );
4869  y[j ] -= sum( x1 * A.load(i,j ) + x2 * A.load(i1,j ) ) * scalar;
4870  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) + x2 * A.load(i1,j+1UL) ) * scalar;
4871  }
4872 
4873  if( i < iend ) {
4874  const IntrinsicType x1( x.load(i) );
4875  y[j ] -= sum( x1 * A.load(i,j ) ) * scalar;
4876  y[j+1UL] -= sum( x1 * A.load(i,j+1UL) ) * scalar;
4877  }
4878  }
4879 
4880  if( j < N )
4881  {
4882  const size_t ibegin( ( IsLower<MT1>::value )
4883  ?( ( IsStrictlyLower<MT1>::value ? j+1UL : j ) & size_t(-IT::size) )
4884  :( 0UL ) );
4885  const size_t iend( ( IsUpper<MT1>::value )
4886  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
4887  :( M ) );
4888  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4889 
4890  size_t i( ibegin );
4891 
4892  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL ) {
4893  const size_t i1( i+IT::size );
4894  const size_t i2( i+IT::size*2UL );
4895  const size_t i3( i+IT::size*3UL );
4896  const IntrinsicType x1( x.load(i ) );
4897  const IntrinsicType x2( x.load(i1) );
4898  const IntrinsicType x3( x.load(i2) );
4899  const IntrinsicType x4( x.load(i3) );
4900  y[j] -= sum( x1 * A.load(i,j) + x2 * A.load(i1,j) + x3 * A.load(i2,j) + x4 * A.load(i3,j) ) * scalar;
4901  }
4902 
4903  for( ; (i+IT::size) < iend; i+=IT::size*2UL ) {
4904  const size_t i1( i+IT::size );
4905  const IntrinsicType x1( x.load(i ) );
4906  const IntrinsicType x2( x.load(i1) );
4907  y[j] -= sum( x1 * A.load(i,j) + x2 * A.load(i1,j) ) * scalar;
4908  }
4909 
4910  if( i < iend ) {
4911  const IntrinsicType x1( x.load(i) );
4912  y[j] -= sum( x1 * A.load(i,j) ) * scalar;
4913  }
4914  }
4915  }
4916  //**********************************************************************************************
4917 
4918  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4933  template< typename VT1 // Type of the left-hand side target vector
4934  , typename VT2 // Type of the left-hand side vector operand
4935  , typename MT1 // Type of the right-hand side matrix operand
4936  , typename ST2 > // Type of the scalar value
4937  static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4938  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4939  {
4940  selectLargeSubAssignKernel( y, x, A, scalar );
4941  }
4942  //**********************************************************************************************
4943 
4944  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
4945 #if BLAZE_BLAS_MODE
4946 
4959  template< typename VT1 // Type of the left-hand side target vector
4960  , typename VT2 // Type of the left-hand side vector operand
4961  , typename MT1 // Type of the right-hand side matrix operand
4962  , typename ST2 > // Type of the scalar value
4963  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4964  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4965  {
4966  if( IsTriangular<MT1>::value ) {
4967  typename VT1::ResultType tmp( scalar * x );
4968  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4969  subAssign( y, tmp );
4970  }
4971  else {
4972  sgemv( y, x, A, -scalar, 1.0F );
4973  }
4974  }
4975 #endif
4976  //**********************************************************************************************
4977 
4978  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
4979 #if BLAZE_BLAS_MODE
4980 
4993  template< typename VT1 // Type of the left-hand side target vector
4994  , typename VT2 // Type of the left-hand side vector operand
4995  , typename MT1 // Type of the right-hand side matrix operand
4996  , typename ST2 > // Type of the scalar value
4997  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4998  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4999  {
5000  if( IsTriangular<MT1>::value ) {
5001  typename VT1::ResultType tmp( scalar * x );
5002  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
5003  subAssign( y, tmp );
5004  }
5005  else {
5006  dgemv( y, x, A, -scalar, 1.0 );
5007  }
5008  }
5009 #endif
5010  //**********************************************************************************************
5011 
5012  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
5013 #if BLAZE_BLAS_MODE
5014 
5029  template< typename VT1 // Type of the left-hand side target vector
5030  , typename VT2 // Type of the left-hand side vector operand
5031  , typename MT1 // Type of the right-hand side matrix operand
5032  , typename ST2 > // Type of the scalar value
5033  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
5034  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
5035  {
5036  if( IsTriangular<MT1>::value ) {
5037  typename VT1::ResultType tmp( scalar * x );
5038  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
5039  subAssign( y, tmp );
5040  }
5041  else {
5042  cgemv( y, x, A, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
5043  }
5044  }
5045 #endif
5046  //**********************************************************************************************
5047 
5048  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
5049 #if BLAZE_BLAS_MODE
5050 
5065  template< typename VT1 // Type of the left-hand side target vector
5066  , typename VT2 // Type of the left-hand side vector operand
5067  , typename MT1 // Type of the right-hand side matrix operand
5068  , typename ST2 > // Type of the scalar value
5069  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
5070  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
5071  {
5072  if( IsTriangular<MT1>::value ) {
5073  typename VT1::ResultType tmp( scalar * x );
5074  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
5075  subAssign( y, tmp );
5076  }
5077  else {
5078  zgemv( y, x, A, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
5079  }
5080  }
5081 #endif
5082  //**********************************************************************************************
5083 
5084  //**Subtraction assignment to sparse vectors****************************************************
5085  // No special implementation for the subtraction assignment to sparse vectors.
5086  //**********************************************************************************************
5087 
5088  //**Multiplication assignment to dense vectors**************************************************
5100  template< typename VT1 // Type of the target dense vector
5101  , bool TF > // Transpose flag of the target dense vector
5102  friend inline void multAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
5103  {
5105 
5109 
5110  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5111 
5112  const ResultType tmp( serial( rhs ) );
5113  multAssign( ~lhs, tmp );
5114  }
5115  //**********************************************************************************************
5116 
5117  //**Multiplication assignment to sparse vectors*************************************************
5118  // No special implementation for the multiplication assignment to sparse vectors.
5119  //**********************************************************************************************
5120 
5121  //**SMP assignment to dense vectors*************************************************************
5135  template< typename VT1 // Type of the target dense vector
5136  , bool TF > // Transpose flag of the target dense vector
5137  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5138  smpAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
5139  {
5141 
5142  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5143 
5144  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
5145  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
5146 
5147  if( right.rows() == 0UL ) {
5148  reset( ~lhs );
5149  return;
5150  }
5151  else if( right.columns() == 0UL ) {
5152  return;
5153  }
5154 
5155  LT x( left ); // Evaluation of the left-hand side dense vector operand
5156  RT A( right ); // Evaluation of the right-hand side dense matrix operand
5157 
5158  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
5159  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
5160  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
5161  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
5162 
5163  smpAssign( ~lhs, x * A * rhs.scalar_ );
5164  }
5165  //**********************************************************************************************
5166 
5167  //**SMP assignment to sparse vectors************************************************************
5181  template< typename VT1 // Type of the target sparse vector
5182  , bool TF > // Transpose flag of the target sparse vector
5183  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5184  smpAssign( SparseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
5185  {
5187 
5191 
5192  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5193 
5194  const ResultType tmp( rhs );
5195  smpAssign( ~lhs, tmp );
5196  }
5197  //**********************************************************************************************
5198 
5199  //**SMP addition assignment to dense vectors****************************************************
5213  template< typename VT1 // Type of the target dense vector
5214  , bool TF > // Transpose flag of the target dense vector
5215  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5216  smpAddAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
5217  {
5219 
5220  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5221 
5222  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
5223  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
5224 
5225  if( right.rows() == 0UL || right.columns() == 0UL ) {
5226  return;
5227  }
5228 
5229  LT x( left ); // Evaluation of the left-hand side dense vector operand
5230  RT A( right ); // Evaluation of the right-hand side dense matrix operand
5231 
5232  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
5233  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
5234  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
5235  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
5236 
5237  smpAddAssign( ~lhs, x * A * rhs.scalar_ );
5238  }
5239  //**********************************************************************************************
5240 
5241  //**SMP addition assignment to sparse vectors***************************************************
5242  // No special implementation for the SMP addition assignment to sparse vectors.
5243  //**********************************************************************************************
5244 
5245  //**SMP subtraction assignment to dense vectors*************************************************
5259  template< typename VT1 // Type of the target dense vector
5260  , bool TF > // Transpose flag of the target dense vector
5261  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5262  smpSubAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
5263  {
5265 
5266  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5267 
5268  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
5269  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
5270 
5271  if( right.rows() == 0UL || right.columns() == 0UL ) {
5272  return;
5273  }
5274 
5275  LT x( left ); // Evaluation of the left-hand side dense vector operand
5276  RT A( right ); // Evaluation of the right-hand side dense matrix operand
5277 
5278  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
5279  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
5280  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
5281  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
5282 
5283  smpSubAssign( ~lhs, x * A * rhs.scalar_ );
5284  }
5285  //**********************************************************************************************
5286 
5287  //**SMP subtraction assignment to sparse vectors************************************************
5288  // No special implementation for the SMP subtraction assignment to sparse vectors.
5289  //**********************************************************************************************
5290 
5291  //**SMP multiplication assignment to dense vectors**********************************************
5305  template< typename VT1 // Type of the target dense vector
5306  , bool TF > // Transpose flag of the target dense vector
5307  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5308  smpMultAssign( DenseVector<VT1,TF>& lhs, const DVecScalarMultExpr& rhs )
5309  {
5311 
5315 
5316  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5317 
5318  const ResultType tmp( rhs );
5319  smpMultAssign( ~lhs, tmp );
5320  }
5321  //**********************************************************************************************
5322 
5323  //**SMP multiplication assignment to sparse vectors*********************************************
5324  // No special implementation for the SMP multiplication assignment to sparse vectors.
5325  //**********************************************************************************************
5326 
5327  //**Compile time checks*************************************************************************
5335  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
5336  //**********************************************************************************************
5337 };
5339 //*************************************************************************************************
5340 
5341 
5342 
5343 
5344 //=================================================================================================
5345 //
5346 // GLOBAL BINARY ARITHMETIC OPERATORS
5347 //
5348 //=================================================================================================
5349 
5350 //*************************************************************************************************
5381 template< typename T1 // Type of the left-hand side dense vector
5382  , typename T2 > // Type of the right-hand side dense matrix
5383 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecTDMatMultExpr<T1,T2> >::Type
5385 {
5387 
5388  if( (~vec).size() != (~mat).rows() )
5389  throw std::invalid_argument( "Vector and matrix sizes do not match" );
5390 
5391  return TDVecTDMatMultExpr<T1,T2>( ~vec, ~mat );
5392 }
5393 //*************************************************************************************************
5394 
5395 
5396 
5397 
5398 //=================================================================================================
5399 //
5400 // SIZE SPECIALIZATIONS
5401 //
5402 //=================================================================================================
5403 
5404 //*************************************************************************************************
5406 template< typename MT, typename VT >
5407 struct Size< TDVecTDMatMultExpr<MT,VT> >
5408  : public Columns<MT>
5409 {};
5411 //*************************************************************************************************
5412 
5413 
5414 
5415 
5416 //=================================================================================================
5417 //
5418 // EXPRESSION TRAIT SPECIALIZATIONS
5419 //
5420 //=================================================================================================
5421 
5422 //*************************************************************************************************
5424 template< typename VT, typename MT, bool AF >
5425 struct SubvectorExprTrait< TDVecTDMatMultExpr<VT,MT>, AF >
5426 {
5427  public:
5428  //**********************************************************************************************
5429  typedef typename MultExprTrait< typename SubvectorExprTrait<const VT,AF>::Type
5430  , typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
5431  //**********************************************************************************************
5432 };
5434 //*************************************************************************************************
5435 
5436 } // namespace blaze
5437 
5438 #endif
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecTDMatMultExpr.h:397
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Expression object for transpose dense vector-transpose dense matrix multiplications.The TDVecTDMatMultExpr class represents the compile time expression for multiplications between transpose dense vectors and column-major dense matrices.
Definition: Forward.h:135
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDVecTDMatMultExpr.h:385
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:280
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
const size_t TDVECTDMATMULT_THRESHOLD
Dense Vector/column-major dense matrix multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:108
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:316
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecTDMatMultExpr.h:277
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecTDMatMultExpr.h:365
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:439
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:276
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:289
TDVecTDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecTDMatMultExpr class.
Definition: TDVecTDMatMultExpr.h:311
Header file for the VecScalarMultExpr base class.
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:118
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:121
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecTDMatMultExpr.h:286
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:263
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecTDMatMultExpr.h:325
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:273
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecTDMatMultExpr.h:275
Header file for the IsMatMatMultExpr type trait class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecTDMatMultExpr.h:429
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
TDVecTDMatMultExpr< VT, MT > This
Type of this TDVecTDMatMultExpr instance.
Definition: TDVecTDMatMultExpr.h:271
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Constraint on the data type.
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:283
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecTDMatMultExpr.h:120
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecTDMatMultExpr.h:274
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
Header file for BLAS level 2 functions.
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:119
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:150
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:440
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Constraint on the data type.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
Header file for the TVecMatMultExpr base class.
Header file for the HasMutableDataAccess type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecTDMatMultExpr.h:419
Header file for all intrinsic functionality.
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:272
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid vector/matrix ...
Definition: TVecMatMultExpr.h:166
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:122
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the IsUpper type trait.
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:123
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecTDMatMultExpr.h:409
const size_t SMP_TDVECTDMATMULT_THRESHOLD
SMP dense vector/column-major dense matrix multiplication threshold.This threshold specifies when a d...
Definition: Thresholds.h:391
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:375
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849