DMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <blaze/math/blas/Level2.h>
56 #include <blaze/math/Intrinsics.h>
57 #include <blaze/math/shims/Reset.h>
78 #include <blaze/system/BLAS.h>
80 #include <blaze/util/Assert.h>
81 #include <blaze/util/Complex.h>
84 #include <blaze/util/DisableIf.h>
85 #include <blaze/util/EnableIf.h>
87 #include <blaze/util/SelectType.h>
88 #include <blaze/util/Types.h>
94 
95 
96 namespace blaze {
97 
98 //=================================================================================================
99 //
100 // CLASS DMATDVECMULTEXPR
101 //
102 //=================================================================================================
103 
104 //*************************************************************************************************
111 template< typename MT // Type of the left-hand side dense matrix
112  , typename VT > // Type of the right-hand side dense vector
113 class DMatDVecMultExpr : public DenseVector< DMatDVecMultExpr<MT,VT>, false >
114  , private MatVecMultExpr
115  , private Computation
116 {
117  private:
118  //**Type definitions****************************************************************************
119  typedef typename MT::ResultType MRT;
120  typedef typename VT::ResultType VRT;
121  typedef typename MRT::ElementType MET;
122  typedef typename VRT::ElementType VET;
123  typedef typename MT::CompositeType MCT;
124  typedef typename VT::CompositeType VCT;
125  //**********************************************************************************************
126 
127  //**********************************************************************************************
129  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
131  //**********************************************************************************************
132 
133  //**********************************************************************************************
135  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
136  //**********************************************************************************************
137 
138  //**********************************************************************************************
140 
144  template< typename T1 >
145  struct UseSMPAssign {
146  enum { value = ( evaluateMatrix || evaluateVector ) };
147  };
149  //**********************************************************************************************
150 
151  //**********************************************************************************************
153 
157  template< typename T1, typename T2, typename T3 >
158  struct UseSinglePrecisionKernel {
159  enum { value = BLAZE_BLAS_MODE &&
164  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
168  };
170  //**********************************************************************************************
171 
172  //**********************************************************************************************
174 
178  template< typename T1, typename T2, typename T3 >
179  struct UseDoublePrecisionKernel {
180  enum { value = BLAZE_BLAS_MODE &&
185  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
189  };
191  //**********************************************************************************************
192 
193  //**********************************************************************************************
195 
199  template< typename T1, typename T2, typename T3 >
200  struct UseSinglePrecisionComplexKernel {
201  typedef complex<float> Type;
202  enum { value = BLAZE_BLAS_MODE &&
207  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
211  };
213  //**********************************************************************************************
214 
215  //**********************************************************************************************
217 
221  template< typename T1, typename T2, typename T3 >
222  struct UseDoublePrecisionComplexKernel {
223  typedef complex<double> Type;
224  enum { value = BLAZE_BLAS_MODE &&
229  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
233  };
235  //**********************************************************************************************
236 
237  //**********************************************************************************************
239 
242  template< typename T1, typename T2, typename T3 >
243  struct UseDefaultKernel {
244  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
245  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
246  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
247  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
248  };
250  //**********************************************************************************************
251 
252  //**********************************************************************************************
254 
258  template< typename T1, typename T2, typename T3 >
259  struct UseVectorizedDefaultKernel {
260  enum { value = !IsDiagonal<T2>::value &&
261  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
266  };
268  //**********************************************************************************************
269 
270  public:
271  //**Type definitions****************************************************************************
277  typedef const ElementType ReturnType;
278  typedef const ResultType CompositeType;
279 
281  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
282 
284  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
285 
288 
291  //**********************************************************************************************
292 
293  //**Compilation flags***************************************************************************
295  enum { vectorizable = !IsDiagonal<MT>::value &&
296  MT::vectorizable && VT::vectorizable &&
300 
302  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
303  !evaluateVector && VT::smpAssignable };
304  //**********************************************************************************************
305 
306  //**Constructor*********************************************************************************
312  explicit inline DMatDVecMultExpr( const MT& mat, const VT& vec )
313  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
314  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
315  {
316  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
317  }
318  //**********************************************************************************************
319 
320  //**Subscript operator**************************************************************************
326  inline ReturnType operator[]( size_t index ) const {
327  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
328 
329  if( ( IsStrictlyLower<MT>::value && index == 0UL ) ||
330  ( IsStrictlyUpper<MT>::value && index == mat_.rows()-1UL ) ||
331  mat_.columns() == 0UL )
332  return ElementType();
333 
335  return mat_(index,index) * vec_[index];
336 
337  const size_t jbegin( ( IsUpper<MT>::value )
338  ?( IsStrictlyUpper<MT>::value ? index+1UL : index )
339  :( 0UL ) );
340  const size_t jend( ( IsLower<MT>::value )
341  ?( IsStrictlyLower<MT>::value ? index : index+1UL )
342  :( mat_.columns() ) );
343  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
344 
345  const size_t jnum( jend - jbegin );
346  const size_t jpos( jbegin + ( ( jnum - 1UL ) & size_t(-2) ) + 1UL );
347 
348  ElementType res( mat_(index,jbegin) * vec_[jbegin] );
349 
350  for( size_t j=jbegin+1UL; j<jpos; j+=2UL ) {
351  res += mat_(index,j) * vec_[j] + mat_(index,j+1UL) * vec_[j+1UL];
352  }
353  if( jpos < jend ) {
354  res += mat_(index,jpos) * vec_[jpos];
355  }
356 
357  return res;
358  }
359  //**********************************************************************************************
360 
361  //**Size function*******************************************************************************
366  inline size_t size() const {
367  return mat_.rows();
368  }
369  //**********************************************************************************************
370 
371  //**Left operand access*************************************************************************
376  inline LeftOperand leftOperand() const {
377  return mat_;
378  }
379  //**********************************************************************************************
380 
381  //**Right operand access************************************************************************
386  inline RightOperand rightOperand() const {
387  return vec_;
388  }
389  //**********************************************************************************************
390 
391  //**********************************************************************************************
397  template< typename T >
398  inline bool canAlias( const T* alias ) const {
399  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
400  }
401  //**********************************************************************************************
402 
403  //**********************************************************************************************
409  template< typename T >
410  inline bool isAliased( const T* alias ) const {
411  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
412  }
413  //**********************************************************************************************
414 
415  //**********************************************************************************************
420  inline bool isAligned() const {
421  return mat_.isAligned() && vec_.isAligned();
422  }
423  //**********************************************************************************************
424 
425  //**********************************************************************************************
430  inline bool canSMPAssign() const {
431  return ( !BLAZE_BLAS_IS_PARALLEL ||
432  ( IsComputation<MT>::value && !evaluateMatrix ) ||
433  ( mat_.rows() * mat_.columns() < DMATDVECMULT_THRESHOLD ) ) &&
435  }
436  //**********************************************************************************************
437 
438  private:
439  //**Member variables****************************************************************************
440  LeftOperand mat_;
441  RightOperand vec_;
442  //**********************************************************************************************
443 
444  //**Assignment to dense vectors*****************************************************************
457  template< typename VT1 > // Type of the target dense vector
458  friend inline void assign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
459  {
461 
462  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
463 
464  if( rhs.mat_.rows() == 0UL ) {
465  return;
466  }
467  else if( rhs.mat_.columns() == 0UL ) {
468  reset( ~lhs );
469  return;
470  }
471 
472  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
473  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
474 
475  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
476  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
477  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
478  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
479 
480  DMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
481  }
483  //**********************************************************************************************
484 
485  //**Assignment to dense vectors (kernel selection)**********************************************
496  template< typename VT1 // Type of the left-hand side target vector
497  , typename MT1 // Type of the left-hand side matrix operand
498  , typename VT2 > // Type of the right-hand side vector operand
499  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
500  {
501  if( ( IsDiagonal<MT1>::value ) ||
502  ( IsComputation<MT>::value && !evaluateMatrix ) ||
503  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
504  selectSmallAssignKernel( y, A, x );
505  else
506  selectBlasAssignKernel( y, A, x );
507  }
509  //**********************************************************************************************
510 
511  //**Default assignment to dense vectors*********************************************************
525  template< typename VT1 // Type of the left-hand side target vector
526  , typename MT1 // Type of the left-hand side matrix operand
527  , typename VT2 > // Type of the right-hand side vector operand
528  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
529  {
530  y.assign( A * x );
531  }
533  //**********************************************************************************************
534 
535  //**Default assignment to dense vectors (small matrices)****************************************
549  template< typename VT1 // Type of the left-hand side target vector
550  , typename MT1 // Type of the left-hand side matrix operand
551  , typename VT2 > // Type of the right-hand side vector operand
552  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
553  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
554  {
555  selectDefaultAssignKernel( y, A, x );
556  }
558  //**********************************************************************************************
559 
560  //**Vectorized default assignment to dense vectors (small matrices)*****************************
574  template< typename VT1 // Type of the left-hand side target vector
575  , typename MT1 // Type of the left-hand side matrix operand
576  , typename VT2 > // Type of the right-hand side vector operand
577  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
578  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
579  {
580  typedef IntrinsicTrait<ElementType> IT;
581 
582  const size_t M( A.rows() );
583  const size_t N( A.columns() );
584 
585  size_t i( 0UL );
586 
587  for( ; (i+8UL) <= M; i+=8UL )
588  {
589  const size_t jbegin( ( IsUpper<MT1>::value )
590  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
591  :( 0UL ) );
592  const size_t jend( ( IsLower<MT1>::value )
593  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
594  :( N ) );
595  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
596 
597  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
598 
599  for( size_t j=jbegin; j<jend; j+=IT::size ) {
600  const IntrinsicType x1( x.load(j) );
601  xmm1 = xmm1 + A.load(i ,j) * x1;
602  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
603  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
604  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
605  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
606  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
607  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
608  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
609  }
610 
611  y[i ] = sum( xmm1 );
612  y[i+1UL] = sum( xmm2 );
613  y[i+2UL] = sum( xmm3 );
614  y[i+3UL] = sum( xmm4 );
615  y[i+4UL] = sum( xmm5 );
616  y[i+5UL] = sum( xmm6 );
617  y[i+6UL] = sum( xmm7 );
618  y[i+7UL] = sum( xmm8 );
619  }
620 
621  for( ; (i+4UL) <= M; i+=4UL )
622  {
623  const size_t jbegin( ( IsUpper<MT1>::value )
624  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
625  :( 0UL ) );
626  const size_t jend( ( IsLower<MT1>::value )
627  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
628  :( N ) );
629  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
630 
631  IntrinsicType xmm1, xmm2, xmm3, xmm4;
632 
633  for( size_t j=jbegin; j<jend; j+=IT::size ) {
634  const IntrinsicType x1( x.load(j) );
635  xmm1 = xmm1 + A.load(i ,j) * x1;
636  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
637  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
638  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
639  }
640 
641  y[i ] = sum( xmm1 );
642  y[i+1UL] = sum( xmm2 );
643  y[i+2UL] = sum( xmm3 );
644  y[i+3UL] = sum( xmm4 );
645  }
646 
647  for( ; (i+3UL) <= M; i+=3UL )
648  {
649  const size_t jbegin( ( IsUpper<MT1>::value )
650  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
651  :( 0UL ) );
652  const size_t jend( ( IsLower<MT1>::value )
653  ?( IsStrictlyLower<MT1>::value ? i+2UL : i+3UL )
654  :( N ) );
655  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
656 
657  IntrinsicType xmm1, xmm2, xmm3;
658 
659  for( size_t j=jbegin; j<jend; j+=IT::size ) {
660  const IntrinsicType x1( x.load(j) );
661  xmm1 = xmm1 + A.load(i ,j) * x1;
662  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
663  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
664  }
665 
666  y[i ] = sum( xmm1 );
667  y[i+1UL] = sum( xmm2 );
668  y[i+2UL] = sum( xmm3 );
669  }
670 
671  for( ; (i+2UL) <= M; i+=2UL )
672  {
673  const size_t jbegin( ( IsUpper<MT1>::value )
674  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
675  :( 0UL ) );
676  const size_t jend( ( IsLower<MT1>::value )
677  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
678  :( N ) );
679  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
680 
681  IntrinsicType xmm1, xmm2;
682 
683  for( size_t j=jbegin; j<jend; j+=IT::size ) {
684  const IntrinsicType x1( x.load(j) );
685  xmm1 = xmm1 + A.load(i ,j) * x1;
686  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
687  }
688 
689  y[i ] = sum( xmm1 );
690  y[i+1UL] = sum( xmm2 );
691  }
692 
693  if( i < M )
694  {
695  const size_t jbegin( ( IsUpper<MT1>::value )
696  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
697  :( 0UL ) );
698  const size_t jend( ( IsLower<MT1>::value )
699  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
700  :( N ) );
701  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
702 
703  IntrinsicType xmm1;
704 
705  for( size_t j=jbegin; j<jend; j+=IT::size ) {
706  xmm1 = xmm1 + A.load(i,j) * x.load(j);
707  }
708 
709  y[i] = sum( xmm1 );
710  }
711  }
713  //**********************************************************************************************
714 
715  //**Default assignment to dense vectors (large matrices)****************************************
729  template< typename VT1 // Type of the left-hand side target vector
730  , typename MT1 // Type of the left-hand side matrix operand
731  , typename VT2 > // Type of the right-hand side vector operand
732  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
733  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
734  {
735  selectDefaultAssignKernel( y, A, x );
736  }
738  //**********************************************************************************************
739 
740  //**Vectorized default assignment to dense vectors (large matrices)*****************************
754  template< typename VT1 // Type of the left-hand side target vector
755  , typename MT1 // Type of the left-hand side matrix operand
756  , typename VT2 > // Type of the right-hand side vector operand
757  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
758  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
759  {
760  typedef IntrinsicTrait<ElementType> IT;
761 
762  const size_t M( A.rows() );
763  const size_t N( A.columns() );
764 
765  reset( y );
766 
767  size_t i( 0UL );
768 
769  for( ; (i+8UL) <= M; i+=8UL )
770  {
771  const size_t jbegin( ( IsUpper<MT1>::value )
772  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
773  :( 0UL ) );
774  const size_t jend( ( IsLower<MT1>::value )
775  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
776  :( N ) );
777  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
778 
779  size_t j( jbegin );
780 
781  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
782  const size_t j1( j+IT::size );
783  const size_t j2( j+IT::size*2UL );
784  const size_t j3( j+IT::size*3UL );
785  const IntrinsicType x1( x.load(j ) );
786  const IntrinsicType x2( x.load(j1) );
787  const IntrinsicType x3( x.load(j2) );
788  const IntrinsicType x4( x.load(j3) );
789  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
790  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
791  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
792  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
793  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 + A.load(i+4UL,j2) * x3 + A.load(i+4UL,j3) * x4 );
794  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 + A.load(i+5UL,j2) * x3 + A.load(i+5UL,j3) * x4 );
795  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 + A.load(i+6UL,j2) * x3 + A.load(i+6UL,j3) * x4 );
796  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 + A.load(i+7UL,j2) * x3 + A.load(i+7UL,j3) * x4 );
797  }
798 
799  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
800  const size_t j1( j+IT::size );
801  const IntrinsicType x1( x.load(j ) );
802  const IntrinsicType x2( x.load(j1) );
803  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
804  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
805  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
806  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
807  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 );
808  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 );
809  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 );
810  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 );
811  }
812 
813  if( j < jend ) {
814  const IntrinsicType x1( x.load(j) );
815  y[i ] += sum( A.load(i ,j) * x1 );
816  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
817  y[i+2UL] += sum( A.load(i+2UL,j) * x1 );
818  y[i+3UL] += sum( A.load(i+3UL,j) * x1 );
819  y[i+4UL] += sum( A.load(i+4UL,j) * x1 );
820  y[i+5UL] += sum( A.load(i+5UL,j) * x1 );
821  y[i+6UL] += sum( A.load(i+6UL,j) * x1 );
822  y[i+7UL] += sum( A.load(i+7UL,j) * x1 );
823  }
824  }
825 
826  for( ; (i+4UL) <= M; i+=4UL )
827  {
828  const size_t jbegin( ( IsUpper<MT1>::value )
829  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
830  :( 0UL ) );
831  const size_t jend( ( IsLower<MT1>::value )
832  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
833  :( N ) );
834  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
835 
836  size_t j( jbegin );
837 
838  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
839  const size_t j1( j+IT::size );
840  const size_t j2( j+IT::size*2UL );
841  const size_t j3( j+IT::size*3UL );
842  const IntrinsicType x1( x.load(j ) );
843  const IntrinsicType x2( x.load(j1) );
844  const IntrinsicType x3( x.load(j2) );
845  const IntrinsicType x4( x.load(j3) );
846  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
847  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
848  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
849  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
850  }
851 
852  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
853  const size_t j1( j+IT::size );
854  const IntrinsicType x1( x.load(j ) );
855  const IntrinsicType x2( x.load(j1) );
856  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
857  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
858  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
859  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
860  }
861 
862  if( j < jend ) {
863  const IntrinsicType x1( x.load(j) );
864  y[i ] += sum( A.load(i ,j) * x1 );
865  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
866  y[i+2UL] += sum( A.load(i+2UL,j) * x1 );
867  y[i+3UL] += sum( A.load(i+3UL,j) * x1 );
868  }
869  }
870 
871  for( ; (i+2UL) <= M; i+=2UL )
872  {
873  const size_t jbegin( ( IsUpper<MT1>::value )
874  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
875  :( 0UL ) );
876  const size_t jend( ( IsLower<MT1>::value )
877  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
878  :( N ) );
879  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
880 
881  size_t j( jbegin );
882 
883  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
884  const size_t j1( j+IT::size );
885  const size_t j2( j+IT::size*2UL );
886  const size_t j3( j+IT::size*3UL );
887  const IntrinsicType x1( x.load(j ) );
888  const IntrinsicType x2( x.load(j1) );
889  const IntrinsicType x3( x.load(j2) );
890  const IntrinsicType x4( x.load(j3) );
891  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
892  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
893  }
894 
895  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
896  const size_t j1( j+IT::size );
897  const IntrinsicType x1( x.load(j ) );
898  const IntrinsicType x2( x.load(j1) );
899  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
900  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
901  }
902 
903  if( j < jend ) {
904  const IntrinsicType x1( x.load(j) );
905  y[i ] += sum( A.load(i ,j) * x1 );
906  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
907  }
908  }
909 
910  if( i < M )
911  {
912  const size_t jbegin( ( IsUpper<MT1>::value )
913  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
914  :( 0UL ) );
915  const size_t jend( ( IsLower<MT1>::value )
916  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
917  :( N ) );
918  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
919 
920  size_t j( jbegin );
921 
922  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
923  const size_t j1( j+IT::size );
924  const size_t j2( j+IT::size*2UL );
925  const size_t j3( j+IT::size*3UL );
926  const IntrinsicType x1( x.load(j ) );
927  const IntrinsicType x2( x.load(j1) );
928  const IntrinsicType x3( x.load(j2) );
929  const IntrinsicType x4( x.load(j3) );
930  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 + A.load(i,j2) * x3 + A.load(i,j3) * x4 );
931  }
932 
933  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
934  const size_t j1( j+IT::size );
935  const IntrinsicType x1( x.load(j ) );
936  const IntrinsicType x2( x.load(j1) );
937  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 );
938  }
939 
940  if( j < jend ) {
941  const IntrinsicType x1( x.load(j) );
942  y[i] += sum( A.load(i,j) * x1 );
943  }
944  }
945  }
947  //**********************************************************************************************
948 
949  //**BLAS-based assignment to dense vectors (default)********************************************
963  template< typename VT1 // Type of the left-hand side target vector
964  , typename MT1 // Type of the left-hand side matrix operand
965  , typename VT2 > // Type of the right-hand side vector operand
966  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
967  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
968  {
969  selectLargeAssignKernel( y, A, x );
970  }
972  //**********************************************************************************************
973 
974  //**BLAS-based assignment to dense vectors (single precision)***********************************
975 #if BLAZE_BLAS_MODE
976 
989  template< typename VT1 // Type of the left-hand side target vector
990  , typename MT1 // Type of the left-hand side matrix operand
991  , typename VT2 > // Type of the right-hand side vector operand
992  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
993  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
994  {
995  if( IsTriangular<MT1>::value ) {
996  assign( y, x );
997  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
998  }
999  else {
1000  sgemv( y, A, x, 1.0F, 0.0F );
1001  }
1002  }
1004 #endif
1005  //**********************************************************************************************
1006 
1007  //**BLAS-based assignment to dense vectors (double precision)***********************************
1008 #if BLAZE_BLAS_MODE
1009 
1022  template< typename VT1 // Type of the left-hand side target vector
1023  , typename MT1 // Type of the left-hand side matrix operand
1024  , typename VT2 > // Type of the right-hand side vector operand
1025  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1026  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
1027  {
1028  if( IsTriangular<MT1>::value ) {
1029  assign( y, x );
1030  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1031  }
1032  else {
1033  dgemv( y, A, x, 1.0, 0.0 );
1034  }
1035  }
1037 #endif
1038  //**********************************************************************************************
1039 
1040  //**BLAS-based assignment to dense vectors (single precision complex)***************************
1041 #if BLAZE_BLAS_MODE
1042 
1055  template< typename VT1 // Type of the left-hand side target vector
1056  , typename MT1 // Type of the left-hand side matrix operand
1057  , typename VT2 > // Type of the right-hand side vector operand
1058  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1059  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
1060  {
1061  if( IsTriangular<MT1>::value ) {
1062  assign( y, x );
1063  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1064  }
1065  else {
1066  cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1067  }
1068  }
1070 #endif
1071  //**********************************************************************************************
1072 
1073  //**BLAS-based assignment to dense vectors (double precision complex)***************************
1074 #if BLAZE_BLAS_MODE
1075 
1088  template< typename VT1 // Type of the left-hand side target vector
1089  , typename MT1 // Type of the left-hand side matrix operand
1090  , typename VT2 > // Type of the right-hand side vector operand
1091  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1092  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
1093  {
1094  if( IsTriangular<MT1>::value ) {
1095  assign( y, x );
1096  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1097  }
1098  else {
1099  zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1100  }
1101  }
1103 #endif
1104  //**********************************************************************************************
1105 
1106  //**Assignment to sparse vectors****************************************************************
1119  template< typename VT1 > // Type of the target sparse vector
1120  friend inline void assign( SparseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1121  {
1123 
1127 
1128  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1129 
1130  const ResultType tmp( serial( rhs ) );
1131  assign( ~lhs, tmp );
1132  }
1134  //**********************************************************************************************
1135 
1136  //**Addition assignment to dense vectors********************************************************
1149  template< typename VT1 > // Type of the target dense vector
1150  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1151  {
1153 
1154  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1155 
1156  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1157  return;
1158  }
1159 
1160  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1161  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1162 
1163  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1164  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1165  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1166  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1167 
1168  DMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
1169  }
1171  //**********************************************************************************************
1172 
1173  //**Addition assignment to dense vectors (kernel selection)*************************************
1184  template< typename VT1 // Type of the left-hand side target vector
1185  , typename MT1 // Type of the left-hand side matrix operand
1186  , typename VT2 > // Type of the right-hand side vector operand
1187  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1188  {
1189  if( ( IsDiagonal<MT1>::value ) ||
1190  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1191  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1192  selectSmallAddAssignKernel( y, A, x );
1193  else
1194  selectBlasAddAssignKernel( y, A, x );
1195  }
1197  //**********************************************************************************************
1198 
1199  //**Default addition assignment to dense vectors************************************************
1213  template< typename VT1 // Type of the left-hand side target vector
1214  , typename MT1 // Type of the left-hand side matrix operand
1215  , typename VT2 > // Type of the right-hand side vector operand
1216  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1217  {
1218  y.addAssign( A * x );
1219  }
1221  //**********************************************************************************************
1222 
1223  //**Default addition assignment to dense vectors (small matrices)*******************************
1237  template< typename VT1 // Type of the left-hand side target vector
1238  , typename MT1 // Type of the left-hand side matrix operand
1239  , typename VT2 > // Type of the right-hand side vector operand
1240  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1241  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1242  {
1243  selectDefaultAddAssignKernel( y, A, x );
1244  }
1246  //**********************************************************************************************
1247 
1248  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1262  template< typename VT1 // Type of the left-hand side target vector
1263  , typename MT1 // Type of the left-hand side matrix operand
1264  , typename VT2 > // Type of the right-hand side vector operand
1265  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1266  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1267  {
1268  typedef IntrinsicTrait<ElementType> IT;
1269 
1270  const size_t M( A.rows() );
1271  const size_t N( A.columns() );
1272 
1273  size_t i( 0UL );
1274 
1275  for( ; (i+8UL) <= M; i+=8UL )
1276  {
1277  const size_t jbegin( ( IsUpper<MT1>::value )
1278  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1279  :( 0UL ) );
1280  const size_t jend( ( IsLower<MT1>::value )
1281  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
1282  :( N ) );
1283  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1284 
1285  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1286 
1287  for( size_t j=jbegin; j<jend; j+=IT::size ) {
1288  const IntrinsicType x1( x.load(j) );
1289  xmm1 = xmm1 + A.load(i ,j) * x1;
1290  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1291  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1292  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1293  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1294  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1295  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1296  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1297  }
1298 
1299  y[i ] += sum( xmm1 );
1300  y[i+1UL] += sum( xmm2 );
1301  y[i+2UL] += sum( xmm3 );
1302  y[i+3UL] += sum( xmm4 );
1303  y[i+4UL] += sum( xmm5 );
1304  y[i+5UL] += sum( xmm6 );
1305  y[i+6UL] += sum( xmm7 );
1306  y[i+7UL] += sum( xmm8 );
1307  }
1308 
1309  for( ; (i+4UL) <= M; i+=4UL )
1310  {
1311  const size_t jbegin( ( IsUpper<MT1>::value )
1312  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1313  :( 0UL ) );
1314  const size_t jend( ( IsLower<MT1>::value )
1315  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
1316  :( N ) );
1317  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1318 
1319  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1320 
1321  for( size_t j=jbegin; j<jend; j+=IT::size ) {
1322  const IntrinsicType x1( x.load(j) );
1323  xmm1 = xmm1 + A.load(i ,j) * x1;
1324  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1325  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1326  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1327  }
1328 
1329  y[i ] += sum( xmm1 );
1330  y[i+1UL] += sum( xmm2 );
1331  y[i+2UL] += sum( xmm3 );
1332  y[i+3UL] += sum( xmm4 );
1333  }
1334 
1335  for( ; (i+3UL) <= M; i+=3UL )
1336  {
1337  const size_t jbegin( ( IsUpper<MT1>::value )
1338  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1339  :( 0UL ) );
1340  const size_t jend( ( IsLower<MT1>::value )
1341  ?( IsStrictlyLower<MT1>::value ? i+2UL : i+3UL )
1342  :( N ) );
1343  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1344 
1345  IntrinsicType xmm1, xmm2, xmm3;
1346 
1347  for( size_t j=jbegin; j<jend; j+=IT::size ) {
1348  const IntrinsicType x1( x.load(j) );
1349  xmm1 = xmm1 + A.load(i ,j) * x1;
1350  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1351  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1352  }
1353 
1354  y[i ] += sum( xmm1 );
1355  y[i+1UL] += sum( xmm2 );
1356  y[i+2UL] += sum( xmm3 );
1357  }
1358 
1359  for( ; (i+2UL) <= M; i+=2UL )
1360  {
1361  const size_t jbegin( ( IsUpper<MT1>::value )
1362  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1363  :( 0UL ) );
1364  const size_t jend( ( IsLower<MT1>::value )
1365  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
1366  :( N ) );
1367  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1368 
1369  IntrinsicType xmm1, xmm2;
1370 
1371  for( size_t j=jbegin; j<jend; j+=IT::size ) {
1372  const IntrinsicType x1( x.load(j) );
1373  xmm1 = xmm1 + A.load(i ,j) * x1;
1374  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1375  }
1376 
1377  y[i ] += sum( xmm1 );
1378  y[i+1UL] += sum( xmm2 );
1379  }
1380 
1381  if( i < M )
1382  {
1383  const size_t jbegin( ( IsUpper<MT1>::value )
1384  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1385  :( 0UL ) );
1386  const size_t jend( ( IsLower<MT1>::value )
1387  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1388  :( N ) );
1389  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1390 
1391  IntrinsicType xmm1;
1392 
1393  for( size_t j=jbegin; j<jend; j+=IT::size ) {
1394  xmm1 = xmm1 + A.load(i,j) * x.load(j);
1395  }
1396 
1397  y[i] += sum( xmm1 );
1398  }
1399  }
1401  //**********************************************************************************************
1402 
1403  //**Default addition assignment to dense vectors (large matrices)*******************************
1417  template< typename VT1 // Type of the left-hand side target vector
1418  , typename MT1 // Type of the left-hand side matrix operand
1419  , typename VT2 > // Type of the right-hand side vector operand
1420  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1421  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1422  {
1423  selectDefaultAddAssignKernel( y, A, x );
1424  }
1426  //**********************************************************************************************
1427 
1428  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1442  template< typename VT1 // Type of the left-hand side target vector
1443  , typename MT1 // Type of the left-hand side matrix operand
1444  , typename VT2 > // Type of the right-hand side vector operand
1445  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1446  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1447  {
1448  typedef IntrinsicTrait<ElementType> IT;
1449 
1450  const size_t M( A.rows() );
1451  const size_t N( A.columns() );
1452 
1453  size_t i( 0UL );
1454 
1455  for( ; (i+8UL) <= M; i+=8UL )
1456  {
1457  const size_t jbegin( ( IsUpper<MT1>::value )
1458  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1459  :( 0UL ) );
1460  const size_t jend( ( IsLower<MT1>::value )
1461  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
1462  :( N ) );
1463  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1464 
1465  size_t j( jbegin );
1466 
1467  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
1468  const size_t j1( j+IT::size );
1469  const size_t j2( j+IT::size*2UL );
1470  const size_t j3( j+IT::size*3UL );
1471  const IntrinsicType x1( x.load(j ) );
1472  const IntrinsicType x2( x.load(j1) );
1473  const IntrinsicType x3( x.load(j2) );
1474  const IntrinsicType x4( x.load(j3) );
1475  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
1476  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
1477  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
1478  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
1479  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 + A.load(i+4UL,j2) * x3 + A.load(i+4UL,j3) * x4 );
1480  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 + A.load(i+5UL,j2) * x3 + A.load(i+5UL,j3) * x4 );
1481  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 + A.load(i+6UL,j2) * x3 + A.load(i+6UL,j3) * x4 );
1482  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 + A.load(i+7UL,j2) * x3 + A.load(i+7UL,j3) * x4 );
1483  }
1484 
1485  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
1486  const size_t j1( j+IT::size );
1487  const IntrinsicType x1( x.load(j ) );
1488  const IntrinsicType x2( x.load(j1) );
1489  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
1490  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
1491  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
1492  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
1493  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 );
1494  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 );
1495  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 );
1496  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 );
1497  }
1498 
1499  if( j < jend ) {
1500  const IntrinsicType x1( x.load(j) );
1501  y[i ] += sum( A.load(i ,j) * x1 );
1502  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
1503  y[i+2UL] += sum( A.load(i+2UL,j) * x1 );
1504  y[i+3UL] += sum( A.load(i+3UL,j) * x1 );
1505  y[i+4UL] += sum( A.load(i+4UL,j) * x1 );
1506  y[i+5UL] += sum( A.load(i+5UL,j) * x1 );
1507  y[i+6UL] += sum( A.load(i+6UL,j) * x1 );
1508  y[i+7UL] += sum( A.load(i+7UL,j) * x1 );
1509  }
1510  }
1511 
1512  for( ; (i+4UL) <= M; i+=4UL )
1513  {
1514  const size_t jbegin( ( IsUpper<MT1>::value )
1515  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1516  :( 0UL ) );
1517  const size_t jend( ( IsLower<MT1>::value )
1518  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
1519  :( N ) );
1520  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1521 
1522  size_t j( jbegin );
1523 
1524  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
1525  const size_t j1( j+IT::size );
1526  const size_t j2( j+IT::size*2UL );
1527  const size_t j3( j+IT::size*3UL );
1528  const IntrinsicType x1( x.load(j ) );
1529  const IntrinsicType x2( x.load(j1) );
1530  const IntrinsicType x3( x.load(j2) );
1531  const IntrinsicType x4( x.load(j3) );
1532  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
1533  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
1534  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
1535  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
1536  }
1537 
1538  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
1539  const size_t j1( j+IT::size );
1540  const IntrinsicType x1( x.load(j ) );
1541  const IntrinsicType x2( x.load(j1) );
1542  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
1543  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
1544  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
1545  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
1546  }
1547 
1548  if( j < jend ) {
1549  const IntrinsicType x1( x.load(j) );
1550  y[i ] += sum( A.load(i ,j) * x1 );
1551  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
1552  y[i+2UL] += sum( A.load(i+2UL,j) * x1 );
1553  y[i+3UL] += sum( A.load(i+3UL,j) * x1 );
1554  }
1555  }
1556 
1557  for( ; (i+2UL) <= M; i+=2UL )
1558  {
1559  const size_t jbegin( ( IsUpper<MT1>::value )
1560  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1561  :( 0UL ) );
1562  const size_t jend( ( IsLower<MT1>::value )
1563  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
1564  :( N ) );
1565  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1566 
1567  size_t j( jbegin );
1568 
1569  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
1570  const size_t j1( j+IT::size );
1571  const size_t j2( j+IT::size*2UL );
1572  const size_t j3( j+IT::size*3UL );
1573  const IntrinsicType x1( x.load(j ) );
1574  const IntrinsicType x2( x.load(j1) );
1575  const IntrinsicType x3( x.load(j2) );
1576  const IntrinsicType x4( x.load(j3) );
1577  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
1578  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
1579  }
1580 
1581  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
1582  const size_t j1( j+IT::size );
1583  const IntrinsicType x1( x.load(j ) );
1584  const IntrinsicType x2( x.load(j1) );
1585  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
1586  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
1587  }
1588 
1589  if( j < jend ) {
1590  const IntrinsicType x1( x.load(j) );
1591  y[i ] += sum( A.load(i ,j) * x1 );
1592  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
1593  }
1594  }
1595 
1596  if( i < M )
1597  {
1598  const size_t jbegin( ( IsUpper<MT1>::value )
1599  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1600  :( 0UL ) );
1601  const size_t jend( ( IsLower<MT1>::value )
1602  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1603  :( N ) );
1604  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1605 
1606  size_t j( jbegin );
1607 
1608  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
1609  const size_t j1( j+IT::size );
1610  const size_t j2( j+IT::size*2UL );
1611  const size_t j3( j+IT::size*3UL );
1612  const IntrinsicType x1( x.load(j ) );
1613  const IntrinsicType x2( x.load(j1) );
1614  const IntrinsicType x3( x.load(j2) );
1615  const IntrinsicType x4( x.load(j3) );
1616  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 + A.load(i,j2) * x3 + A.load(i,j3) * x4 );
1617  }
1618 
1619  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
1620  const size_t j1( j+IT::size );
1621  const IntrinsicType x1( x.load(j ) );
1622  const IntrinsicType x2( x.load(j1) );
1623  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 );
1624  }
1625 
1626  if( j < jend ) {
1627  const IntrinsicType x1( x.load(j) );
1628  y[i] += sum( A.load(i,j) * x1 );
1629  }
1630  }
1631  }
1633  //**********************************************************************************************
1634 
1635  //**BLAS-based addition assignment to dense vectors (default)***********************************
1649  template< typename VT1 // Type of the left-hand side target vector
1650  , typename MT1 // Type of the left-hand side matrix operand
1651  , typename VT2 > // Type of the right-hand side vector operand
1652  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1653  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1654  {
1655  selectLargeAddAssignKernel( y, A, x );
1656  }
1658  //**********************************************************************************************
1659 
1660  //**BLAS-based addition assignment to dense vectors (single precision)**************************
1661 #if BLAZE_BLAS_MODE
1662 
1675  template< typename VT1 // Type of the left-hand side target vector
1676  , typename MT1 // Type of the left-hand side matrix operand
1677  , typename VT2 > // Type of the right-hand side vector operand
1678  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1679  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1680  {
1681  if( IsTriangular<MT1>::value ) {
1682  typename VT1::ResultType tmp( x );
1683  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1684  addAssign( y, tmp );
1685  }
1686  else {
1687  sgemv( y, A, x, 1.0F, 1.0F );
1688  }
1689  }
1691 #endif
1692  //**********************************************************************************************
1693 
1694  //**BLAS-based addition assignment to dense vectors (double precision)**************************
1695 #if BLAZE_BLAS_MODE
1696 
1709  template< typename VT1 // Type of the left-hand side target vector
1710  , typename MT1 // Type of the left-hand side matrix operand
1711  , typename VT2 > // Type of the right-hand side vector operand
1712  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1713  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1714  {
1715  if( IsTriangular<MT1>::value ) {
1716  typename VT1::ResultType tmp( x );
1717  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1718  addAssign( y, tmp );
1719  }
1720  else {
1721  dgemv( y, A, x, 1.0, 1.0 );
1722  }
1723  }
1725 #endif
1726  //**********************************************************************************************
1727 
1728  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
1729 #if BLAZE_BLAS_MODE
1730 
1743  template< typename VT1 // Type of the left-hand side target vector
1744  , typename MT1 // Type of the left-hand side matrix operand
1745  , typename VT2 > // Type of the right-hand side vector operand
1746  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1747  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1748  {
1749  if( IsTriangular<MT1>::value ) {
1750  typename VT1::ResultType tmp( x );
1751  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1752  addAssign( y, tmp );
1753  }
1754  else {
1755  cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1756  }
1757  }
1759 #endif
1760  //**********************************************************************************************
1761 
1762  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
1763 #if BLAZE_BLAS_MODE
1764 
1777  template< typename VT1 // Type of the left-hand side target vector
1778  , typename MT1 // Type of the left-hand side matrix operand
1779  , typename VT2 > // Type of the right-hand side vector operand
1780  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1781  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1782  {
1783  if( IsTriangular<MT1>::value ) {
1784  typename VT1::ResultType tmp( x );
1785  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1786  addAssign( y, tmp );
1787  }
1788  else {
1789  zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1790  }
1791  }
1793 #endif
1794  //**********************************************************************************************
1795 
1796  //**Addition assignment to sparse vectors*******************************************************
1797  // No special implementation for the addition assignment to sparse vectors.
1798  //**********************************************************************************************
1799 
1800  //**Subtraction assignment to dense vectors*****************************************************
1813  template< typename VT1 > // Type of the target dense vector
1814  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
1815  {
1817 
1818  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1819 
1820  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1821  return;
1822  }
1823 
1824  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1825  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1826 
1827  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1828  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1829  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1830  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1831 
1832  DMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1833  }
1835  //**********************************************************************************************
1836 
1837  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1848  template< typename VT1 // Type of the left-hand side target vector
1849  , typename MT1 // Type of the left-hand side matrix operand
1850  , typename VT2 > // Type of the right-hand side vector operand
1851  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1852  {
1853  if( ( IsDiagonal<MT1>::value ) ||
1854  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1855  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
1856  selectSmallSubAssignKernel( y, A, x );
1857  else
1858  selectBlasSubAssignKernel( y, A, x );
1859  }
1861  //**********************************************************************************************
1862 
1863  //**Default subtraction assignment to dense vectors*********************************************
1877  template< typename VT1 // Type of the left-hand side target vector
1878  , typename MT1 // Type of the left-hand side matrix operand
1879  , typename VT2 > // Type of the right-hand side vector operand
1880  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1881  {
1882  y.subAssign( A * x );
1883  }
1885  //**********************************************************************************************
1886 
1887  //**Default subtraction assignment to dense vectors (small matrices)****************************
1901  template< typename VT1 // Type of the left-hand side target vector
1902  , typename MT1 // Type of the left-hand side matrix operand
1903  , typename VT2 > // Type of the right-hand side vector operand
1904  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1905  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1906  {
1907  selectDefaultSubAssignKernel( y, A, x );
1908  }
1910  //**********************************************************************************************
1911 
1912  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1926  template< typename VT1 // Type of the left-hand side target vector
1927  , typename MT1 // Type of the left-hand side matrix operand
1928  , typename VT2 > // Type of the right-hand side vector operand
1929  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1930  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1931  {
1932  typedef IntrinsicTrait<ElementType> IT;
1933 
1934  const size_t M( A.rows() );
1935  const size_t N( A.columns() );
1936 
1937  size_t i( 0UL );
1938 
1939  for( ; (i+8UL) <= M; i+=8UL )
1940  {
1941  const size_t jbegin( ( IsUpper<MT1>::value )
1942  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1943  :( 0UL ) );
1944  const size_t jend( ( IsLower<MT1>::value )
1945  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
1946  :( N ) );
1947  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1948 
1949  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1950 
1951  for( size_t j=jbegin; j<jend; j+=IT::size ) {
1952  const IntrinsicType x1( x.load(j) );
1953  xmm1 = xmm1 + A.load(i ,j) * x1;
1954  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1955  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1956  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1957  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1958  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1959  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1960  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1961  }
1962 
1963  y[i ] -= sum( xmm1 );
1964  y[i+1UL] -= sum( xmm2 );
1965  y[i+2UL] -= sum( xmm3 );
1966  y[i+3UL] -= sum( xmm4 );
1967  y[i+4UL] -= sum( xmm5 );
1968  y[i+5UL] -= sum( xmm6 );
1969  y[i+6UL] -= sum( xmm7 );
1970  y[i+7UL] -= sum( xmm8 );
1971  }
1972 
1973  for( ; (i+4UL) <= M; i+=4UL )
1974  {
1975  const size_t jbegin( ( IsUpper<MT1>::value )
1976  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
1977  :( 0UL ) );
1978  const size_t jend( ( IsLower<MT1>::value )
1979  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
1980  :( N ) );
1981  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1982 
1983  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1984 
1985  for( size_t j=jbegin; j<jend; j+=IT::size ) {
1986  const IntrinsicType x1( x.load(j) );
1987  xmm1 = xmm1 + A.load(i ,j) * x1;
1988  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1989  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1990  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1991  }
1992 
1993  y[i ] -= sum( xmm1 );
1994  y[i+1UL] -= sum( xmm2 );
1995  y[i+2UL] -= sum( xmm3 );
1996  y[i+3UL] -= sum( xmm4 );
1997  }
1998 
1999  for( ; (i+3UL) <= M; i+=3UL )
2000  {
2001  const size_t jbegin( ( IsUpper<MT1>::value )
2002  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
2003  :( 0UL ) );
2004  const size_t jend( ( IsLower<MT1>::value )
2005  ?( IsStrictlyLower<MT1>::value ? i+2UL : i+3UL )
2006  :( N ) );
2007  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2008 
2009  IntrinsicType xmm1, xmm2, xmm3;
2010 
2011  for( size_t j=jbegin; j<jend; j+=IT::size ) {
2012  const IntrinsicType x1( x.load(j) );
2013  xmm1 = xmm1 + A.load(i ,j) * x1;
2014  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2015  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2016  }
2017 
2018  y[i ] -= sum( xmm1 );
2019  y[i+1UL] -= sum( xmm2 );
2020  y[i+2UL] -= sum( xmm3 );
2021  }
2022 
2023  for( ; (i+2UL) <= M; i+=2UL )
2024  {
2025  const size_t jbegin( ( IsUpper<MT1>::value )
2026  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
2027  :( 0UL ) );
2028  const size_t jend( ( IsLower<MT1>::value )
2029  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
2030  :( N ) );
2031  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2032 
2033  IntrinsicType xmm1, xmm2;
2034 
2035  for( size_t j=jbegin; j<jend; j+=IT::size ) {
2036  const IntrinsicType x1( x.load(j) );
2037  xmm1 = xmm1 + A.load(i ,j) * x1;
2038  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2039  }
2040 
2041  y[i ] -= sum( xmm1 );
2042  y[i+1UL] -= sum( xmm2 );
2043  }
2044 
2045  if( i < M )
2046  {
2047  const size_t jbegin( ( IsUpper<MT1>::value )
2048  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
2049  :( 0UL ) );
2050  const size_t jend( ( IsLower<MT1>::value )
2051  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
2052  :( N ) );
2053  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2054 
2055  IntrinsicType xmm1;
2056 
2057  for( size_t j=jbegin; j<jend; j+=IT::size ) {
2058  xmm1 = xmm1 + A.load(i,j) * x.load(j);
2059  }
2060 
2061  y[i] -= sum( xmm1 );
2062  }
2063  }
2065  //**********************************************************************************************
2066 
2067  //**Default subtraction assignment to dense vectors (large matrices)****************************
2081  template< typename VT1 // Type of the left-hand side target vector
2082  , typename MT1 // Type of the left-hand side matrix operand
2083  , typename VT2 > // Type of the right-hand side vector operand
2084  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
2085  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2086  {
2087  selectDefaultSubAssignKernel( y, A, x );
2088  }
2090  //**********************************************************************************************
2091 
2092  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
2106  template< typename VT1 // Type of the left-hand side target vector
2107  , typename MT1 // Type of the left-hand side matrix operand
2108  , typename VT2 > // Type of the right-hand side vector operand
2109  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
2110  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2111  {
2112  typedef IntrinsicTrait<ElementType> IT;
2113 
2114  const size_t M( A.rows() );
2115  const size_t N( A.columns() );
2116 
2117  size_t i( 0UL );
2118 
2119  for( ; (i+8UL) <= M; i+=8UL )
2120  {
2121  const size_t jbegin( ( IsUpper<MT1>::value )
2122  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
2123  :( 0UL ) );
2124  const size_t jend( ( IsLower<MT1>::value )
2125  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
2126  :( N ) );
2127  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2128 
2129  size_t j( jbegin );
2130 
2131  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
2132  const size_t j1( j+IT::size );
2133  const size_t j2( j+IT::size*2UL );
2134  const size_t j3( j+IT::size*3UL );
2135  const IntrinsicType x1( x.load(j ) );
2136  const IntrinsicType x2( x.load(j1) );
2137  const IntrinsicType x3( x.load(j2) );
2138  const IntrinsicType x4( x.load(j3) );
2139  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
2140  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
2141  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
2142  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
2143  y[i+4UL] -= sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 + A.load(i+4UL,j2) * x3 + A.load(i+4UL,j3) * x4 );
2144  y[i+5UL] -= sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 + A.load(i+5UL,j2) * x3 + A.load(i+5UL,j3) * x4 );
2145  y[i+6UL] -= sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 + A.load(i+6UL,j2) * x3 + A.load(i+6UL,j3) * x4 );
2146  y[i+7UL] -= sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 + A.load(i+7UL,j2) * x3 + A.load(i+7UL,j3) * x4 );
2147  }
2148 
2149  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
2150  const size_t j1( j+IT::size );
2151  const IntrinsicType x1( x.load(j ) );
2152  const IntrinsicType x2( x.load(j1) );
2153  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
2154  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
2155  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
2156  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
2157  y[i+4UL] -= sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 );
2158  y[i+5UL] -= sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 );
2159  y[i+6UL] -= sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 );
2160  y[i+7UL] -= sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 );
2161  }
2162 
2163  if( j < jend ) {
2164  const IntrinsicType x1( x.load(j) );
2165  y[i ] -= sum( A.load(i ,j) * x1 );
2166  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 );
2167  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 );
2168  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 );
2169  y[i+4UL] -= sum( A.load(i+4UL,j) * x1 );
2170  y[i+5UL] -= sum( A.load(i+5UL,j) * x1 );
2171  y[i+6UL] -= sum( A.load(i+6UL,j) * x1 );
2172  y[i+7UL] -= sum( A.load(i+7UL,j) * x1 );
2173  }
2174  }
2175 
2176  for( ; (i+4UL) <= M; i+=4UL )
2177  {
2178  const size_t jbegin( ( IsUpper<MT1>::value )
2179  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
2180  :( 0UL ) );
2181  const size_t jend( ( IsLower<MT1>::value )
2182  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
2183  :( N ) );
2184  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2185 
2186  size_t j( jbegin );
2187 
2188  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
2189  const size_t j1( j+IT::size );
2190  const size_t j2( j+IT::size*2UL );
2191  const size_t j3( j+IT::size*3UL );
2192  const IntrinsicType x1( x.load(j ) );
2193  const IntrinsicType x2( x.load(j1) );
2194  const IntrinsicType x3( x.load(j2) );
2195  const IntrinsicType x4( x.load(j3) );
2196  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
2197  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
2198  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
2199  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
2200  }
2201 
2202  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
2203  const size_t j1( j+IT::size );
2204  const IntrinsicType x1( x.load(j ) );
2205  const IntrinsicType x2( x.load(j1) );
2206  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
2207  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
2208  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
2209  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
2210  }
2211 
2212  if( j < jend ) {
2213  const IntrinsicType x1( x.load(j) );
2214  y[i ] -= sum( A.load(i ,j) * x1 );
2215  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 );
2216  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 );
2217  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 );
2218  }
2219  }
2220 
2221  for( ; (i+2UL) <= M; i+=2UL )
2222  {
2223  const size_t jbegin( ( IsUpper<MT1>::value )
2224  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
2225  :( 0UL ) );
2226  const size_t jend( ( IsLower<MT1>::value )
2227  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
2228  :( N ) );
2229  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2230 
2231  size_t j( jbegin );
2232 
2233  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
2234  const size_t j1( j+IT::size );
2235  const size_t j2( j+IT::size*2UL );
2236  const size_t j3( j+IT::size*3UL );
2237  const IntrinsicType x1( x.load(j ) );
2238  const IntrinsicType x2( x.load(j1) );
2239  const IntrinsicType x3( x.load(j2) );
2240  const IntrinsicType x4( x.load(j3) );
2241  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
2242  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
2243  }
2244 
2245  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
2246  const size_t j1( j+IT::size );
2247  const IntrinsicType x1( x.load(j ) );
2248  const IntrinsicType x2( x.load(j1) );
2249  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
2250  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
2251  }
2252 
2253  if( j < jend ) {
2254  const IntrinsicType x1( x.load(j) );
2255  y[i ] -= sum( A.load(i ,j) * x1 );
2256  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 );
2257  }
2258  }
2259 
2260  if( i < M )
2261  {
2262  const size_t jbegin( ( IsUpper<MT1>::value )
2263  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
2264  :( 0UL ) );
2265  const size_t jend( ( IsLower<MT1>::value )
2266  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
2267  :( N ) );
2268  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2269 
2270  size_t j( jbegin );
2271 
2272  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
2273  const size_t j1( j+IT::size );
2274  const size_t j2( j+IT::size*2UL );
2275  const size_t j3( j+IT::size*3UL );
2276  const IntrinsicType x1( x.load(j ) );
2277  const IntrinsicType x2( x.load(j1) );
2278  const IntrinsicType x3( x.load(j2) );
2279  const IntrinsicType x4( x.load(j3) );
2280  y[i] -= sum( A.load(i,j) * x1 + A.load(i,j1) * x2 + A.load(i,j2) * x3 + A.load(i,j3) * x4 );
2281  }
2282 
2283  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
2284  const size_t j1( j+IT::size );
2285  const IntrinsicType x1( x.load(j ) );
2286  const IntrinsicType x2( x.load(j1) );
2287  y[i] -= sum( A.load(i,j) * x1 + A.load(i,j1) * x2 );
2288  }
2289 
2290  if( j < jend ) {
2291  const IntrinsicType x1( x.load(j) );
2292  y[i] -= sum( A.load(i,j) * x1 );
2293  }
2294  }
2295  }
2297  //**********************************************************************************************
2298 
2299  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2313  template< typename VT1 // Type of the left-hand side target vector
2314  , typename MT1 // Type of the left-hand side matrix operand
2315  , typename VT2 > // Type of the right-hand side vector operand
2316  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
2317  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2318  {
2319  selectLargeSubAssignKernel( y, A, x );
2320  }
2322  //**********************************************************************************************
2323 
2324  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
2325 #if BLAZE_BLAS_MODE
2326 
2339  template< typename VT1 // Type of the left-hand side target vector
2340  , typename MT1 // Type of the left-hand side matrix operand
2341  , typename VT2 > // Type of the right-hand side vector operand
2342  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
2343  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2344  {
2345  if( IsTriangular<MT1>::value ) {
2346  typename VT1::ResultType tmp( x );
2347  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2348  subAssign( y, tmp );
2349  }
2350  else {
2351  sgemv( y, A, x, -1.0F, 1.0F );
2352  }
2353  }
2355 #endif
2356  //**********************************************************************************************
2357 
2358  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
2359 #if BLAZE_BLAS_MODE
2360 
2373  template< typename VT1 // Type of the left-hand side target vector
2374  , typename MT1 // Type of the left-hand side matrix operand
2375  , typename VT2 > // Type of the right-hand side vector operand
2376  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
2377  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2378  {
2379  if( IsTriangular<MT1>::value ) {
2380  typename VT1::ResultType tmp( x );
2381  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2382  subAssign( y, tmp );
2383  }
2384  else {
2385  dgemv( y, A, x, -1.0, 1.0 );
2386  }
2387  }
2389 #endif
2390  //**********************************************************************************************
2391 
2392  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
2393 #if BLAZE_BLAS_MODE
2394 
2407  template< typename VT1 // Type of the left-hand side target vector
2408  , typename MT1 // Type of the left-hand side matrix operand
2409  , typename VT2 > // Type of the right-hand side vector operand
2410  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2411  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2412  {
2413  if( IsTriangular<MT1>::value ) {
2414  typename VT1::ResultType tmp( x );
2415  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2416  subAssign( y, tmp );
2417  }
2418  else {
2419  cgemv( y, A, x, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2420  }
2421  }
2423 #endif
2424  //**********************************************************************************************
2425 
2426  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
2427 #if BLAZE_BLAS_MODE
2428 
2441  template< typename VT1 // Type of the left-hand side target vector
2442  , typename MT1 // Type of the left-hand side matrix operand
2443  , typename VT2 > // Type of the right-hand side vector operand
2444  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2445  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2446  {
2447  if( IsTriangular<MT1>::value ) {
2448  typename VT1::ResultType tmp( x );
2449  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2450  subAssign( y, tmp );
2451  }
2452  else {
2453  zgemv( y, A, x, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2454  }
2455  }
2457 #endif
2458  //**********************************************************************************************
2459 
2460  //**Subtraction assignment to sparse vectors****************************************************
2461  // No special implementation for the subtraction assignment to sparse vectors.
2462  //**********************************************************************************************
2463 
2464  //**Multiplication assignment to dense vectors**************************************************
2477  template< typename VT1 > // Type of the target dense vector
2478  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
2479  {
2481 
2485 
2486  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2487 
2488  const ResultType tmp( serial( rhs ) );
2489  multAssign( ~lhs, tmp );
2490  }
2492  //**********************************************************************************************
2493 
2494  //**Multiplication assignment to sparse vectors*************************************************
2495  // No special implementation for the multiplication assignment to sparse vectors.
2496  //**********************************************************************************************
2497 
2498  //**SMP assignment to dense vectors*************************************************************
2513  template< typename VT1 > // Type of the target dense vector
2514  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2515  smpAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
2516  {
2518 
2519  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2520 
2521  if( rhs.mat_.rows() == 0UL ) {
2522  return;
2523  }
2524  else if( rhs.mat_.columns() == 0UL ) {
2525  reset( ~lhs );
2526  return;
2527  }
2528 
2529  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2530  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2531 
2532  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2533  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2534  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2535  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2536 
2537  smpAssign( ~lhs, A * x );
2538  }
2540  //**********************************************************************************************
2541 
2542  //**SMP assignment to sparse vectors************************************************************
2557  template< typename VT1 > // Type of the target sparse vector
2558  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2559  smpAssign( SparseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
2560  {
2562 
2566 
2567  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2568 
2569  const ResultType tmp( rhs );
2570  smpAssign( ~lhs, tmp );
2571  }
2573  //**********************************************************************************************
2574 
2575  //**SMP addition assignment to dense vectors****************************************************
2590  template< typename VT1 > // Type of the target dense vector
2591  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2592  smpAddAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
2593  {
2595 
2596  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2597 
2598  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2599  return;
2600  }
2601 
2602  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2603  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2604 
2605  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2606  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2607  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2608  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2609 
2610  smpAddAssign( ~lhs, A * x );
2611  }
2613  //**********************************************************************************************
2614 
2615  //**SMP addition assignment to sparse vectors***************************************************
2616  // No special implementation for the SMP addition assignment to sparse vectors.
2617  //**********************************************************************************************
2618 
2619  //**SMP subtraction assignment to dense vectors*************************************************
2634  template< typename VT1 > // Type of the target dense vector
2635  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2636  smpSubAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
2637  {
2639 
2640  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2641 
2642  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2643  return;
2644  }
2645 
2646  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2647  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2648 
2649  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2650  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2651  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2652  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2653 
2654  smpSubAssign( ~lhs, A * x );
2655  }
2657  //**********************************************************************************************
2658 
2659  //**SMP subtraction assignment to sparse vectors************************************************
2660  // No special implementation for the SMP subtraction assignment to sparse vectors.
2661  //**********************************************************************************************
2662 
2663  //**SMP multiplication assignment to dense vectors**********************************************
2678  template< typename VT1 > // Type of the target dense vector
2679  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2680  smpMultAssign( DenseVector<VT1,false>& lhs, const DMatDVecMultExpr& rhs )
2681  {
2683 
2687 
2688  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2689 
2690  const ResultType tmp( rhs );
2691  smpMultAssign( ~lhs, tmp );
2692  }
2694  //**********************************************************************************************
2695 
2696  //**SMP multiplication assignment to sparse vectors*********************************************
2697  // No special implementation for the SMP multiplication assignment to sparse vectors.
2698  //**********************************************************************************************
2699 
2700  //**Compile time checks*************************************************************************
2708  //**********************************************************************************************
2709 };
2710 //*************************************************************************************************
2711 
2712 
2713 
2714 
2715 //=================================================================================================
2716 //
2717 // DVECSCALARMULTEXPR SPECIALIZATION
2718 //
2719 //=================================================================================================
2720 
2721 //*************************************************************************************************
2729 template< typename MT // Type of the left-hand side dense matrix
2730  , typename VT // Type of the right-hand side dense vector
2731  , typename ST > // Type of the scalar value
2732 class DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >
2733  : public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
2734  , private VecScalarMultExpr
2735  , private Computation
2736 {
2737  private:
2738  //**Type definitions****************************************************************************
2739  typedef DMatDVecMultExpr<MT,VT> MVM;
2740  typedef typename MVM::ResultType RES;
2741  typedef typename MT::ResultType MRT;
2742  typedef typename VT::ResultType VRT;
2743  typedef typename MRT::ElementType MET;
2744  typedef typename VRT::ElementType VET;
2745  typedef typename MT::CompositeType MCT;
2746  typedef typename VT::CompositeType VCT;
2747  //**********************************************************************************************
2748 
2749  //**********************************************************************************************
2751  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2752  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2753  //**********************************************************************************************
2754 
2755  //**********************************************************************************************
2757  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<MT>::value };
2758  //**********************************************************************************************
2759 
2760  //**********************************************************************************************
2762 
2765  template< typename T1 >
2766  struct UseSMPAssign {
2767  enum { value = ( evaluateMatrix || evaluateVector ) };
2768  };
2769  //**********************************************************************************************
2770 
2771  //**********************************************************************************************
2773 
2776  template< typename T1, typename T2, typename T3, typename T4 >
2777  struct UseSinglePrecisionKernel {
2778  enum { value = BLAZE_BLAS_MODE &&
2779  HasMutableDataAccess<T1>::value &&
2780  HasConstDataAccess<T2>::value &&
2781  HasConstDataAccess<T3>::value &&
2782  !IsDiagonal<T2>::value &&
2783  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2784  IsFloat<typename T1::ElementType>::value &&
2785  IsFloat<typename T2::ElementType>::value &&
2786  IsFloat<typename T3::ElementType>::value &&
2787  !IsComplex<T4>::value };
2788  };
2789  //**********************************************************************************************
2790 
2791  //**********************************************************************************************
2793 
2796  template< typename T1, typename T2, typename T3, typename T4 >
2797  struct UseDoublePrecisionKernel {
2798  enum { value = BLAZE_BLAS_MODE &&
2799  HasMutableDataAccess<T1>::value &&
2800  HasConstDataAccess<T2>::value &&
2801  HasConstDataAccess<T3>::value &&
2802  !IsDiagonal<T2>::value &&
2803  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2804  IsDouble<typename T1::ElementType>::value &&
2805  IsDouble<typename T2::ElementType>::value &&
2806  IsDouble<typename T3::ElementType>::value &&
2807  !IsComplex<T4>::value };
2808  };
2809  //**********************************************************************************************
2810 
2811  //**********************************************************************************************
2813 
2816  template< typename T1, typename T2, typename T3 >
2817  struct UseSinglePrecisionComplexKernel {
2818  typedef complex<float> Type;
2819  enum { value = BLAZE_BLAS_MODE &&
2820  HasMutableDataAccess<T1>::value &&
2821  HasConstDataAccess<T2>::value &&
2822  HasConstDataAccess<T3>::value &&
2823  !IsDiagonal<T2>::value &&
2824  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2825  IsSame<typename T1::ElementType,Type>::value &&
2826  IsSame<typename T2::ElementType,Type>::value &&
2827  IsSame<typename T3::ElementType,Type>::value };
2828  };
2829  //**********************************************************************************************
2830 
2831  //**********************************************************************************************
2833 
2836  template< typename T1, typename T2, typename T3 >
2837  struct UseDoublePrecisionComplexKernel {
2838  typedef complex<double> Type;
2839  enum { value = BLAZE_BLAS_MODE &&
2840  HasMutableDataAccess<T1>::value &&
2841  HasConstDataAccess<T2>::value &&
2842  HasConstDataAccess<T3>::value &&
2843  !IsDiagonal<T2>::value &&
2844  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2845  IsSame<typename T1::ElementType,Type>::value &&
2846  IsSame<typename T2::ElementType,Type>::value &&
2847  IsSame<typename T3::ElementType,Type>::value };
2848  };
2849  //**********************************************************************************************
2850 
2851  //**********************************************************************************************
2853 
2855  template< typename T1, typename T2, typename T3, typename T4 >
2856  struct UseDefaultKernel {
2857  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2858  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2859  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2860  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2861  };
2862  //**********************************************************************************************
2863 
2864  //**********************************************************************************************
2866 
2869  template< typename T1, typename T2, typename T3, typename T4 >
2870  struct UseVectorizedDefaultKernel {
2871  enum { value = !IsDiagonal<T2>::value &&
2872  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2873  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2874  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2875  IsSame<typename T1::ElementType,T4>::value &&
2876  IntrinsicTrait<typename T1::ElementType>::addition &&
2877  IntrinsicTrait<typename T1::ElementType>::multiplication };
2878  };
2879  //**********************************************************************************************
2880 
2881  public:
2882  //**Type definitions****************************************************************************
2883  typedef DVecScalarMultExpr<MVM,ST,false> This;
2884  typedef typename MultTrait<RES,ST>::Type ResultType;
2885  typedef typename ResultType::TransposeType TransposeType;
2886  typedef typename ResultType::ElementType ElementType;
2887  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2888  typedef const ElementType ReturnType;
2889  typedef const ResultType CompositeType;
2890 
2892  typedef const DMatDVecMultExpr<MT,VT> LeftOperand;
2893 
2895  typedef ST RightOperand;
2896 
2898  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
2899 
2901  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
2902  //**********************************************************************************************
2903 
2904  //**Compilation flags***************************************************************************
2906  enum { vectorizable = !IsDiagonal<MT>::value &&
2907  MT::vectorizable && VT::vectorizable &&
2908  IsSame<MET,VET>::value &&
2909  IsSame<MET,ST>::value &&
2910  IntrinsicTrait<MET>::addition &&
2911  IntrinsicTrait<MET>::multiplication };
2912 
2914  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2915  !evaluateVector && VT::smpAssignable };
2916  //**********************************************************************************************
2917 
2918  //**Constructor*********************************************************************************
2924  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
2925  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2926  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2927  {}
2928  //**********************************************************************************************
2929 
2930  //**Subscript operator**************************************************************************
2936  inline ReturnType operator[]( size_t index ) const {
2937  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2938  return vector_[index] * scalar_;
2939  }
2940  //**********************************************************************************************
2941 
2942  //**Size function*******************************************************************************
2947  inline size_t size() const {
2948  return vector_.size();
2949  }
2950  //**********************************************************************************************
2951 
2952  //**Left operand access*************************************************************************
2957  inline LeftOperand leftOperand() const {
2958  return vector_;
2959  }
2960  //**********************************************************************************************
2961 
2962  //**Right operand access************************************************************************
2967  inline RightOperand rightOperand() const {
2968  return scalar_;
2969  }
2970  //**********************************************************************************************
2971 
2972  //**********************************************************************************************
2978  template< typename T >
2979  inline bool canAlias( const T* alias ) const {
2980  return vector_.canAlias( alias );
2981  }
2982  //**********************************************************************************************
2983 
2984  //**********************************************************************************************
2990  template< typename T >
2991  inline bool isAliased( const T* alias ) const {
2992  return vector_.isAliased( alias );
2993  }
2994  //**********************************************************************************************
2995 
2996  //**********************************************************************************************
3001  inline bool isAligned() const {
3002  return vector_.isAligned();
3003  }
3004  //**********************************************************************************************
3005 
3006  //**********************************************************************************************
3011  inline bool canSMPAssign() const {
3012  typename MVM::LeftOperand A( vector_.leftOperand() );
3013  return ( !BLAZE_BLAS_IS_PARALLEL ||
3014  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3015  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) ) &&
3017  }
3018  //**********************************************************************************************
3019 
3020  private:
3021  //**Member variables****************************************************************************
3022  LeftOperand vector_;
3023  RightOperand scalar_;
3024  //**********************************************************************************************
3025 
3026  //**Assignment to dense vectors*****************************************************************
3038  template< typename VT1 > // Type of the target dense vector
3039  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3040  {
3042 
3043  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3044 
3045  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3046  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3047 
3048  if( left.rows() == 0UL ) {
3049  return;
3050  }
3051  else if( left.columns() == 0UL ) {
3052  reset( ~lhs );
3053  return;
3054  }
3055 
3056  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3057  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3058 
3059  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3060  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3061  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3062  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3063 
3064  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
3065  }
3066  //**********************************************************************************************
3067 
3068  //**Assignment to dense vectors (kernel selection)**********************************************
3079  template< typename VT1 // Type of the left-hand side target vector
3080  , typename MT1 // Type of the left-hand side matrix operand
3081  , typename VT2 // Type of the right-hand side vector operand
3082  , typename ST2 > // Type of the scalar value
3083  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3084  {
3085  if( ( IsDiagonal<MT1>::value ) ||
3086  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3087  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
3088  selectSmallAssignKernel( y, A, x, scalar );
3089  else
3090  selectBlasAssignKernel( y, A, x, scalar );
3091  }
3092  //**********************************************************************************************
3093 
3094  //**Default assignment to dense vectors*********************************************************
3108  template< typename VT1 // Type of the left-hand side target vector
3109  , typename MT1 // Type of the left-hand side matrix operand
3110  , typename VT2 // Type of the right-hand side vector operand
3111  , typename ST2 > // Type of the scalar value
3112  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3113  selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3114  {
3115  y.assign( A * x * scalar );
3116  }
3117  //**********************************************************************************************
3118 
3119  //**Default assignment to dense vectors (small matrices)****************************************
3133  template< typename VT1 // Type of the left-hand side target vector
3134  , typename MT1 // Type of the left-hand side matrix operand
3135  , typename VT2 // Type of the right-hand side vector operand
3136  , typename ST2 > // Type of the scalar value
3137  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3138  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3139  {
3140  selectDefaultAssignKernel( y, A, x, scalar );
3141  }
3142  //**********************************************************************************************
3143 
3144  //**Vectorized default assignment to dense vectors (small matrices)*****************************
3158  template< typename VT1 // Type of the left-hand side target vector
3159  , typename MT1 // Type of the left-hand side matrix operand
3160  , typename VT2 // Type of the right-hand side vector operand
3161  , typename ST2 > // Type of the scalar value
3162  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3163  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3164  {
3165  typedef IntrinsicTrait<ElementType> IT;
3166 
3167  const size_t M( A.rows() );
3168  const size_t N( A.columns() );
3169 
3170  size_t i( 0UL );
3171 
3172  for( ; (i+8UL) <= M; i+=8UL )
3173  {
3174  const size_t jbegin( ( IsUpper<MT1>::value )
3175  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3176  :( 0UL ) );
3177  const size_t jend( ( IsLower<MT1>::value )
3178  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
3179  :( N ) );
3180  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3181 
3182  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3183 
3184  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3185  const IntrinsicType x1( x.load(j) );
3186  xmm1 = xmm1 + A.load(i ,j) * x1;
3187  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3188  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3189  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
3190  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
3191  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
3192  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
3193  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
3194  }
3195 
3196  y[i ] = sum( xmm1 ) * scalar;
3197  y[i+1UL] = sum( xmm2 ) * scalar;
3198  y[i+2UL] = sum( xmm3 ) * scalar;
3199  y[i+3UL] = sum( xmm4 ) * scalar;
3200  y[i+4UL] = sum( xmm5 ) * scalar;
3201  y[i+5UL] = sum( xmm6 ) * scalar;
3202  y[i+6UL] = sum( xmm7 ) * scalar;
3203  y[i+7UL] = sum( xmm8 ) * scalar;
3204  }
3205 
3206  for( ; (i+4UL) <= M; i+=4UL )
3207  {
3208  const size_t jbegin( ( IsUpper<MT1>::value )
3209  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3210  :( 0UL ) );
3211  const size_t jend( ( IsLower<MT1>::value )
3212  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
3213  :( N ) );
3214  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3215 
3216  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3217 
3218  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3219  const IntrinsicType x1( x.load(j) );
3220  xmm1 = xmm1 + A.load(i ,j) * x1;
3221  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3222  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3223  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
3224  }
3225 
3226  y[i ] = sum( xmm1 ) * scalar;
3227  y[i+1UL] = sum( xmm2 ) * scalar;
3228  y[i+2UL] = sum( xmm3 ) * scalar;
3229  y[i+3UL] = sum( xmm4 ) * scalar;
3230  }
3231 
3232  for( ; (i+3UL) <= M; i+=3UL )
3233  {
3234  const size_t jbegin( ( IsUpper<MT1>::value )
3235  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3236  :( 0UL ) );
3237  const size_t jend( ( IsLower<MT1>::value )
3238  ?( IsStrictlyLower<MT1>::value ? i+2UL : i+3UL )
3239  :( N ) );
3240  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3241 
3242  IntrinsicType xmm1, xmm2, xmm3;
3243 
3244  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3245  const IntrinsicType x1( x.load(j) );
3246  xmm1 = xmm1 + A.load(i ,j) * x1;
3247  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3248  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3249  }
3250 
3251  y[i ] = sum( xmm1 ) * scalar;
3252  y[i+1UL] = sum( xmm2 ) * scalar;
3253  y[i+2UL] = sum( xmm3 ) * scalar;
3254  }
3255 
3256  for( ; (i+2UL) <= M; i+=2UL )
3257  {
3258  const size_t jbegin( ( IsUpper<MT1>::value )
3259  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3260  :( 0UL ) );
3261  const size_t jend( ( IsLower<MT1>::value )
3262  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
3263  :( N ) );
3264  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3265 
3266  IntrinsicType xmm1, xmm2;
3267 
3268  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3269  const IntrinsicType x1( x.load(j) );
3270  xmm1 = xmm1 + A.load(i ,j) * x1;
3271  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3272  }
3273 
3274  y[i ] = sum( xmm1 ) * scalar;
3275  y[i+1UL] = sum( xmm2 ) * scalar;
3276  }
3277 
3278  if( i < M )
3279  {
3280  const size_t jbegin( ( IsUpper<MT1>::value )
3281  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3282  :( 0UL ) );
3283  const size_t jend( ( IsLower<MT1>::value )
3284  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
3285  :( N ) );
3286  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3287 
3288  IntrinsicType xmm1;
3289 
3290  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3291  xmm1 = xmm1 + A.load(i,j) * x.load(j);
3292  }
3293 
3294  y[i] = sum( xmm1 ) * scalar;
3295  }
3296  }
3297  //**********************************************************************************************
3298 
3299  //**Default assignment to dense vectors (large matrices)****************************************
3313  template< typename VT1 // Type of the left-hand side target vector
3314  , typename MT1 // Type of the left-hand side matrix operand
3315  , typename VT2 // Type of the right-hand side vector operand
3316  , typename ST2 > // Type of the scalar value
3317  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3318  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3319  {
3320  selectDefaultAssignKernel( y, A, x, scalar );
3321  }
3322  //**********************************************************************************************
3323 
3324  //**Vectorized default assignment to dense vectors (large matrices)*****************************
3338  template< typename VT1 // Type of the left-hand side target vector
3339  , typename MT1 // Type of the left-hand side matrix operand
3340  , typename VT2 // Type of the right-hand side vector operand
3341  , typename ST2 > // Type of the scalar value
3342  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3343  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3344  {
3345  typedef IntrinsicTrait<ElementType> IT;
3346 
3347  const size_t M( A.rows() );
3348  const size_t N( A.columns() );
3349 
3350  reset( y );
3351 
3352  size_t i( 0UL );
3353 
3354  for( ; (i+8UL) <= M; i+=8UL )
3355  {
3356  const size_t jbegin( ( IsUpper<MT1>::value )
3357  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3358  :( 0UL ) );
3359  const size_t jend( ( IsLower<MT1>::value )
3360  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
3361  :( N ) );
3362  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3363 
3364  size_t j( jbegin );
3365 
3366  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
3367  const size_t j1( j+IT::size );
3368  const size_t j2( j+IT::size*2UL );
3369  const size_t j3( j+IT::size*3UL );
3370  const IntrinsicType x1( x.load(j ) );
3371  const IntrinsicType x2( x.load(j1) );
3372  const IntrinsicType x3( x.load(j2) );
3373  const IntrinsicType x4( x.load(j3) );
3374  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
3375  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
3376  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
3377  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
3378  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 + A.load(i+4UL,j2) * x3 + A.load(i+4UL,j3) * x4 );
3379  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 + A.load(i+5UL,j2) * x3 + A.load(i+5UL,j3) * x4 );
3380  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 + A.load(i+6UL,j2) * x3 + A.load(i+6UL,j3) * x4 );
3381  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 + A.load(i+7UL,j2) * x3 + A.load(i+7UL,j3) * x4 );
3382  }
3383 
3384  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
3385  const size_t j1( j+IT::size );
3386  const IntrinsicType x1( x.load(j ) );
3387  const IntrinsicType x2( x.load(j1) );
3388  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
3389  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
3390  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
3391  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
3392  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 );
3393  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 );
3394  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 );
3395  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 );
3396  }
3397 
3398  if( j < jend ) {
3399  const IntrinsicType x1( x.load(j) );
3400  y[i ] += sum( A.load(i ,j) * x1 );
3401  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
3402  y[i+2UL] += sum( A.load(i+2UL,j) * x1 );
3403  y[i+3UL] += sum( A.load(i+3UL,j) * x1 );
3404  y[i+4UL] += sum( A.load(i+4UL,j) * x1 );
3405  y[i+5UL] += sum( A.load(i+5UL,j) * x1 );
3406  y[i+6UL] += sum( A.load(i+6UL,j) * x1 );
3407  y[i+7UL] += sum( A.load(i+7UL,j) * x1 );
3408  }
3409 
3410  y[i ] *= scalar;
3411  y[i+1UL] *= scalar;
3412  y[i+2UL] *= scalar;
3413  y[i+3UL] *= scalar;
3414  y[i+4UL] *= scalar;
3415  y[i+5UL] *= scalar;
3416  y[i+6UL] *= scalar;
3417  y[i+7UL] *= scalar;
3418  }
3419 
3420  for( ; (i+4UL) <= M; i+=4UL )
3421  {
3422  const size_t jbegin( ( IsUpper<MT1>::value )
3423  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3424  :( 0UL ) );
3425  const size_t jend( ( IsLower<MT1>::value )
3426  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
3427  :( N ) );
3428  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3429 
3430  size_t j( jbegin );
3431 
3432  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
3433  const size_t j1( j+IT::size );
3434  const size_t j2( j+IT::size*2UL );
3435  const size_t j3( j+IT::size*3UL );
3436  const IntrinsicType x1( x.load(j ) );
3437  const IntrinsicType x2( x.load(j1) );
3438  const IntrinsicType x3( x.load(j2) );
3439  const IntrinsicType x4( x.load(j3) );
3440  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
3441  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
3442  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 );
3443  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 );
3444  }
3445 
3446  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
3447  const size_t j1( j+IT::size );
3448  const IntrinsicType x1( x.load(j ) );
3449  const IntrinsicType x2( x.load(j1) );
3450  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
3451  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
3452  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 );
3453  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 );
3454  }
3455 
3456  if( j < jend ) {
3457  const IntrinsicType x1( x.load(j) );
3458  y[i ] += sum( A.load(i ,j) * x1 );
3459  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
3460  y[i+2UL] += sum( A.load(i+2UL,j) * x1 );
3461  y[i+3UL] += sum( A.load(i+3UL,j) * x1 );
3462  }
3463 
3464  y[i ] *= scalar;
3465  y[i+1UL] *= scalar;
3466  y[i+2UL] *= scalar;
3467  y[i+3UL] *= scalar;
3468  }
3469 
3470  for( ; (i+2UL) <= M; i+=2UL )
3471  {
3472  const size_t jbegin( ( IsUpper<MT1>::value )
3473  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3474  :( 0UL ) );
3475  const size_t jend( ( IsLower<MT1>::value )
3476  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
3477  :( N ) );
3478  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3479 
3480  size_t j( jbegin );
3481 
3482  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
3483  const size_t j1( j+IT::size );
3484  const size_t j2( j+IT::size*2UL );
3485  const size_t j3( j+IT::size*3UL );
3486  const IntrinsicType x1( x.load(j ) );
3487  const IntrinsicType x2( x.load(j1) );
3488  const IntrinsicType x3( x.load(j2) );
3489  const IntrinsicType x4( x.load(j3) );
3490  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 );
3491  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 );
3492  }
3493 
3494  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
3495  const size_t j1( j+IT::size );
3496  const IntrinsicType x1( x.load(j ) );
3497  const IntrinsicType x2( x.load(j1) );
3498  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 );
3499  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 );
3500  }
3501 
3502  if( j < jend ) {
3503  const IntrinsicType x1( x.load(j) );
3504  y[i ] += sum( A.load(i ,j) * x1 );
3505  y[i+1UL] += sum( A.load(i+1UL,j) * x1 );
3506  }
3507 
3508  y[i ] *= scalar;
3509  y[i+1UL] *= scalar;
3510  }
3511 
3512  if( i < M )
3513  {
3514  const size_t jbegin( ( IsUpper<MT1>::value )
3515  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3516  :( 0UL ) );
3517  const size_t jend( ( IsLower<MT1>::value )
3518  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
3519  :( N ) );
3520  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3521 
3522  size_t j( jbegin );
3523 
3524  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
3525  const size_t j1( j+IT::size );
3526  const size_t j2( j+IT::size*2UL );
3527  const size_t j3( j+IT::size*3UL );
3528  const IntrinsicType x1( x.load(j ) );
3529  const IntrinsicType x2( x.load(j1) );
3530  const IntrinsicType x3( x.load(j2) );
3531  const IntrinsicType x4( x.load(j3) );
3532  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 + A.load(i,j2) * x3 + A.load(i,j3) * x4 );
3533  }
3534 
3535  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
3536  const size_t j1( j+IT::size );
3537  const IntrinsicType x1( x.load(j ) );
3538  const IntrinsicType x2( x.load(j1) );
3539  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 );
3540  }
3541 
3542  if( j < jend ) {
3543  const IntrinsicType x1( x.load(j) );
3544  y[i] += sum( A.load(i,j) * x1 );
3545  }
3546 
3547  y[i] *= scalar;
3548  }
3549  }
3550  //**********************************************************************************************
3551 
3552  //**BLAS-based assignment to dense vectors (default)********************************************
3566  template< typename VT1 // Type of the left-hand side target vector
3567  , typename MT1 // Type of the left-hand side matrix operand
3568  , typename VT2 // Type of the right-hand side vector operand
3569  , typename ST2 > // Type of the scalar value
3570  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3571  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3572  {
3573  selectLargeAssignKernel( y, A, x, scalar );
3574  }
3575  //**********************************************************************************************
3576 
3577  //**BLAS-based assignment to dense vectors (single precision)***********************************
3578 #if BLAZE_BLAS_MODE
3579 
3592  template< typename VT1 // Type of the left-hand side target vector
3593  , typename MT1 // Type of the left-hand side matrix operand
3594  , typename VT2 // Type of the right-hand side vector operand
3595  , typename ST2 > // Type of the scalar value
3596  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3597  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3598  {
3599  if( IsTriangular<MT1>::value ) {
3600  assign( y, scalar * x );
3601  strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3602  }
3603  else {
3604  sgemv( y, A, x, scalar, 0.0F );
3605  }
3606  }
3607 #endif
3608  //**********************************************************************************************
3609 
3610  //**BLAS-based assignment to dense vectors (double precision)***********************************
3611 #if BLAZE_BLAS_MODE
3612 
3625  template< typename VT1 // Type of the left-hand side target vector
3626  , typename MT1 // Type of the left-hand side matrix operand
3627  , typename VT2 // Type of the right-hand side vector operand
3628  , typename ST2 > // Type of the scalar value
3629  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3630  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3631  {
3632  if( IsTriangular<MT1>::value ) {
3633  assign( y, scalar * x );
3634  dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3635  }
3636  else {
3637  dgemv( y, A, x, scalar, 0.0 );
3638  }
3639  }
3640 #endif
3641  //**********************************************************************************************
3642 
3643  //**BLAS-based assignment to dense vectors (single precision complex)***************************
3644 #if BLAZE_BLAS_MODE
3645 
3658  template< typename VT1 // Type of the left-hand side target vector
3659  , typename MT1 // Type of the left-hand side matrix operand
3660  , typename VT2 // Type of the right-hand side vector operand
3661  , typename ST2 > // Type of the scalar value
3662  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3663  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3664  {
3665  if( IsTriangular<MT1>::value ) {
3666  assign( y, scalar * x );
3667  ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3668  }
3669  else {
3670  cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3671  }
3672  }
3673 #endif
3674  //**********************************************************************************************
3675 
3676  //**BLAS-based assignment to dense vectors (double precision complex)***************************
3677 #if BLAZE_BLAS_MODE
3678 
3691  template< typename VT1 // Type of the left-hand side target vector
3692  , typename MT1 // Type of the left-hand side matrix operand
3693  , typename VT2 // Type of the right-hand side vector operand
3694  , typename ST2 > // Type of the scalar value
3695  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3696  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3697  {
3698  if( IsTriangular<MT1>::value ) {
3699  assign( y, scalar * x );
3700  ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3701  }
3702  else {
3703  zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3704  }
3705  }
3706 #endif
3707  //**********************************************************************************************
3708 
3709  //**Assignment to sparse vectors****************************************************************
3721  template< typename VT1 > // Type of the target sparse vector
3722  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3723  {
3725 
3729 
3730  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3731 
3732  const ResultType tmp( serial( rhs ) );
3733  assign( ~lhs, tmp );
3734  }
3735  //**********************************************************************************************
3736 
3737  //**Addition assignment to dense vectors********************************************************
3749  template< typename VT1 > // Type of the target dense vector
3750  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3751  {
3753 
3754  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3755 
3756  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3757  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3758 
3759  if( left.rows() == 0UL || left.columns() == 0UL ) {
3760  return;
3761  }
3762 
3763  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3764  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3765 
3766  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3767  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3768  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3769  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3770 
3771  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
3772  }
3773  //**********************************************************************************************
3774 
3775  //**Addition assignment to dense vectors (kernel selection)*************************************
3786  template< typename VT1 // Type of the left-hand side target vector
3787  , typename MT1 // Type of the left-hand side matrix operand
3788  , typename VT2 // Type of the right-hand side vector operand
3789  , typename ST2 > // Type of the scalar value
3790  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3791  {
3792  if( ( IsDiagonal<MT1>::value ) ||
3793  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3794  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
3795  selectSmallAddAssignKernel( y, A, x, scalar );
3796  else
3797  selectBlasAddAssignKernel( y, A, x, scalar );
3798  }
3799  //**********************************************************************************************
3800 
3801  //**Default addition assignment to dense vectors************************************************
3815  template< typename VT1 // Type of the left-hand side target vector
3816  , typename MT1 // Type of the left-hand side matrix operand
3817  , typename VT2 // Type of the right-hand side vector operand
3818  , typename ST2 > // Type of the scalar value
3819  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3820  {
3821  y.addAssign( A * x * scalar );
3822  }
3823  //**********************************************************************************************
3824 
3825  //**Default addition assignment to dense vectors (small matrices)*******************************
3839  template< typename VT1 // Type of the left-hand side target vector
3840  , typename MT1 // Type of the left-hand side matrix operand
3841  , typename VT2 // Type of the right-hand side vector operand
3842  , typename ST2 > // Type of the scalar value
3843  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3844  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3845  {
3846  selectDefaultAddAssignKernel( y, A, x, scalar );
3847  }
3848  //**********************************************************************************************
3849 
3850  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3864  template< typename VT1 // Type of the left-hand side target vector
3865  , typename MT1 // Type of the left-hand side matrix operand
3866  , typename VT2 // Type of the right-hand side vector operand
3867  , typename ST2 > // Type of the scalar value
3868  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3869  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3870  {
3871  typedef IntrinsicTrait<ElementType> IT;
3872 
3873  const size_t M( A.rows() );
3874  const size_t N( A.columns() );
3875 
3876  size_t i( 0UL );
3877 
3878  for( ; (i+8UL) <= M; i+=8UL )
3879  {
3880  const size_t jbegin( ( IsUpper<MT1>::value )
3881  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3882  :( 0UL ) );
3883  const size_t jend( ( IsLower<MT1>::value )
3884  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
3885  :( N ) );
3886  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3887 
3888  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3889 
3890  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3891  const IntrinsicType x1( x.load(j) );
3892  xmm1 = xmm1 + A.load(i ,j) * x1;
3893  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3894  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3895  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
3896  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
3897  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
3898  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
3899  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
3900  }
3901 
3902  y[i ] += sum( xmm1 ) * scalar;
3903  y[i+1UL] += sum( xmm2 ) * scalar;
3904  y[i+2UL] += sum( xmm3 ) * scalar;
3905  y[i+3UL] += sum( xmm4 ) * scalar;
3906  y[i+4UL] += sum( xmm5 ) * scalar;
3907  y[i+5UL] += sum( xmm6 ) * scalar;
3908  y[i+6UL] += sum( xmm7 ) * scalar;
3909  y[i+7UL] += sum( xmm8 ) * scalar;
3910  }
3911 
3912  for( ; (i+4UL) <= M; i+=4UL )
3913  {
3914  const size_t jbegin( ( IsUpper<MT1>::value )
3915  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3916  :( 0UL ) );
3917  const size_t jend( ( IsLower<MT1>::value )
3918  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
3919  :( N ) );
3920  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3921 
3922  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3923 
3924  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3925  const IntrinsicType x1( x.load(j) );
3926  xmm1 = xmm1 + A.load(i ,j) * x1;
3927  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3928  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3929  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
3930  }
3931 
3932  y[i ] += sum( xmm1 ) * scalar;
3933  y[i+1UL] += sum( xmm2 ) * scalar;
3934  y[i+2UL] += sum( xmm3 ) * scalar;
3935  y[i+3UL] += sum( xmm4 ) * scalar;
3936  }
3937 
3938  for( ; (i+3UL) <= M; i+=3UL )
3939  {
3940  const size_t jbegin( ( IsUpper<MT1>::value )
3941  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3942  :( 0UL ) );
3943  const size_t jend( ( IsLower<MT1>::value )
3944  ?( IsStrictlyLower<MT1>::value ? i+2UL : i+3UL )
3945  :( N ) );
3946  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3947 
3948  IntrinsicType xmm1, xmm2, xmm3;
3949 
3950  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3951  const IntrinsicType x1( x.load(j) );
3952  xmm1 = xmm1 + A.load(i ,j) * x1;
3953  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3954  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3955  }
3956 
3957  y[i ] += sum( xmm1 ) * scalar;
3958  y[i+1UL] += sum( xmm2 ) * scalar;
3959  y[i+2UL] += sum( xmm3 ) * scalar;
3960  }
3961 
3962  for( ; (i+2UL) <= M; i+=2UL )
3963  {
3964  const size_t jbegin( ( IsUpper<MT1>::value )
3965  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3966  :( 0UL ) );
3967  const size_t jend( ( IsLower<MT1>::value )
3968  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
3969  :( N ) );
3970  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3971 
3972  IntrinsicType xmm1, xmm2;
3973 
3974  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3975  const IntrinsicType x1( x.load(j) );
3976  xmm1 = xmm1 + A.load(i ,j) * x1;
3977  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3978  }
3979 
3980  y[i ] += sum( xmm1 ) * scalar;
3981  y[i+1UL] += sum( xmm2 ) * scalar;
3982  }
3983 
3984  if( i < M )
3985  {
3986  const size_t jbegin( ( IsUpper<MT1>::value )
3987  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
3988  :( 0UL ) );
3989  const size_t jend( ( IsLower<MT1>::value )
3990  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
3991  :( N ) );
3992  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3993 
3994  IntrinsicType xmm1;
3995 
3996  for( size_t j=jbegin; j<jend; j+=IT::size ) {
3997  xmm1 = xmm1 + A.load(i,j) * x.load(j);
3998  }
3999 
4000  y[i] += sum( xmm1 ) * scalar;
4001  }
4002  }
4003  //**********************************************************************************************
4004 
4005  //**Default addition assignment to dense vectors (large matrices)*******************************
4019  template< typename VT1 // Type of the left-hand side target vector
4020  , typename MT1 // Type of the left-hand side matrix operand
4021  , typename VT2 // Type of the right-hand side vector operand
4022  , typename ST2 > // Type of the scalar value
4023  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4024  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4025  {
4026  selectDefaultAddAssignKernel( y, A, x, scalar );
4027  }
4028  //**********************************************************************************************
4029 
4030  //**Vectorized default addition assignment to dense vectors (large matrices)********************
4044  template< typename VT1 // Type of the left-hand side target vector
4045  , typename MT1 // Type of the left-hand side matrix operand
4046  , typename VT2 // Type of the right-hand side vector operand
4047  , typename ST2 > // Type of the scalar value
4048  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4049  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4050  {
4051  typedef IntrinsicTrait<ElementType> IT;
4052 
4053  const size_t M( A.rows() );
4054  const size_t N( A.columns() );
4055 
4056  size_t i( 0UL );
4057 
4058  for( ; (i+8UL) <= M; i+=8UL )
4059  {
4060  const size_t jbegin( ( IsUpper<MT1>::value )
4061  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4062  :( 0UL ) );
4063  const size_t jend( ( IsLower<MT1>::value )
4064  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
4065  :( N ) );
4066  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4067 
4068  size_t j( jbegin );
4069 
4070  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4071  const size_t j1( j+IT::size );
4072  const size_t j2( j+IT::size*2UL );
4073  const size_t j3( j+IT::size*3UL );
4074  const IntrinsicType x1( x.load(j ) );
4075  const IntrinsicType x2( x.load(j1) );
4076  const IntrinsicType x3( x.load(j2) );
4077  const IntrinsicType x4( x.load(j3) );
4078  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 ) * scalar;
4079  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 ) * scalar;
4080  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 ) * scalar;
4081  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 ) * scalar;
4082  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 + A.load(i+4UL,j2) * x3 + A.load(i+4UL,j3) * x4 ) * scalar;
4083  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 + A.load(i+5UL,j2) * x3 + A.load(i+5UL,j3) * x4 ) * scalar;
4084  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 + A.load(i+6UL,j2) * x3 + A.load(i+6UL,j3) * x4 ) * scalar;
4085  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 + A.load(i+7UL,j2) * x3 + A.load(i+7UL,j3) * x4 ) * scalar;
4086  }
4087 
4088  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4089  const size_t j1( j+IT::size );
4090  const IntrinsicType x1( x.load(j ) );
4091  const IntrinsicType x2( x.load(j1) );
4092  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 ) * scalar;
4093  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 ) * scalar;
4094  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 ) * scalar;
4095  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 ) * scalar;
4096  y[i+4UL] += sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 ) * scalar;
4097  y[i+5UL] += sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 ) * scalar;
4098  y[i+6UL] += sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 ) * scalar;
4099  y[i+7UL] += sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 ) * scalar;
4100  }
4101 
4102  if( j < jend ) {
4103  const IntrinsicType x1( x.load(j) );
4104  y[i ] += sum( A.load(i ,j) * x1 ) * scalar;
4105  y[i+1UL] += sum( A.load(i+1UL,j) * x1 ) * scalar;
4106  y[i+2UL] += sum( A.load(i+2UL,j) * x1 ) * scalar;
4107  y[i+3UL] += sum( A.load(i+3UL,j) * x1 ) * scalar;
4108  y[i+4UL] += sum( A.load(i+4UL,j) * x1 ) * scalar;
4109  y[i+5UL] += sum( A.load(i+5UL,j) * x1 ) * scalar;
4110  y[i+6UL] += sum( A.load(i+6UL,j) * x1 ) * scalar;
4111  y[i+7UL] += sum( A.load(i+7UL,j) * x1 ) * scalar;
4112  }
4113  }
4114 
4115  for( ; (i+4UL) <= M; i+=4UL )
4116  {
4117  const size_t jbegin( ( IsUpper<MT1>::value )
4118  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4119  :( 0UL ) );
4120  const size_t jend( ( IsLower<MT1>::value )
4121  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
4122  :( N ) );
4123  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4124 
4125  size_t j( jbegin );
4126 
4127  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4128  const size_t j1( j+IT::size );
4129  const size_t j2( j+IT::size*2UL );
4130  const size_t j3( j+IT::size*3UL );
4131  const IntrinsicType x1( x.load(j ) );
4132  const IntrinsicType x2( x.load(j1) );
4133  const IntrinsicType x3( x.load(j2) );
4134  const IntrinsicType x4( x.load(j3) );
4135  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 ) * scalar;
4136  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 ) * scalar;
4137  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 ) * scalar;
4138  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 ) * scalar;
4139  }
4140 
4141  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4142  const size_t j1( j+IT::size );
4143  const IntrinsicType x1( x.load(j ) );
4144  const IntrinsicType x2( x.load(j1) );
4145  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 ) * scalar;
4146  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 ) * scalar;
4147  y[i+2UL] += sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 ) * scalar;
4148  y[i+3UL] += sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 ) * scalar;
4149  }
4150 
4151  if( j < jend ) {
4152  const IntrinsicType x1( x.load(j) );
4153  y[i ] += sum( A.load(i ,j) * x1 ) * scalar;
4154  y[i+1UL] += sum( A.load(i+1UL,j) * x1 ) * scalar;
4155  y[i+2UL] += sum( A.load(i+2UL,j) * x1 ) * scalar;
4156  y[i+3UL] += sum( A.load(i+3UL,j) * x1 ) * scalar;
4157  }
4158  }
4159 
4160  for( ; (i+2UL) <= M; i+=2UL )
4161  {
4162  const size_t jbegin( ( IsUpper<MT1>::value )
4163  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4164  :( 0UL ) );
4165  const size_t jend( ( IsLower<MT1>::value )
4166  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
4167  :( N ) );
4168  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4169 
4170  size_t j( jbegin );
4171 
4172  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4173  const size_t j1( j+IT::size );
4174  const size_t j2( j+IT::size*2UL );
4175  const size_t j3( j+IT::size*3UL );
4176  const IntrinsicType x1( x.load(j ) );
4177  const IntrinsicType x2( x.load(j1) );
4178  const IntrinsicType x3( x.load(j2) );
4179  const IntrinsicType x4( x.load(j3) );
4180  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 ) * scalar;
4181  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 ) * scalar;
4182  }
4183 
4184  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4185  const size_t j1( j+IT::size );
4186  const IntrinsicType x1( x.load(j ) );
4187  const IntrinsicType x2( x.load(j1) );
4188  y[i ] += sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 ) * scalar;
4189  y[i+1UL] += sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 ) * scalar;
4190  }
4191 
4192  if( j < jend ) {
4193  const IntrinsicType x1( x.load(j) );
4194  y[i ] += sum( A.load(i ,j) * x1 ) * scalar;
4195  y[i+1UL] += sum( A.load(i+1UL,j) * x1 ) * scalar;
4196  }
4197  }
4198 
4199  if( i < M )
4200  {
4201  const size_t jbegin( ( IsUpper<MT1>::value )
4202  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4203  :( 0UL ) );
4204  const size_t jend( ( IsLower<MT1>::value )
4205  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
4206  :( N ) );
4207  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4208 
4209  size_t j( jbegin );
4210 
4211  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4212  const size_t j1( j+IT::size );
4213  const size_t j2( j+IT::size*2UL );
4214  const size_t j3( j+IT::size*3UL );
4215  const IntrinsicType x1( x.load(j ) );
4216  const IntrinsicType x2( x.load(j1) );
4217  const IntrinsicType x3( x.load(j2) );
4218  const IntrinsicType x4( x.load(j3) );
4219  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 + A.load(i,j2) * x3 + A.load(i,j3) * x4 ) * scalar;
4220  }
4221 
4222  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4223  const size_t j1( j+IT::size );
4224  const IntrinsicType x1( x.load(j ) );
4225  const IntrinsicType x2( x.load(j1) );
4226  y[i] += sum( A.load(i,j) * x1 + A.load(i,j1) * x2 ) * scalar;
4227  }
4228 
4229  if( j < jend ) {
4230  const IntrinsicType x1( x.load(j) );
4231  y[i] += sum( A.load(i,j) * x1 ) * scalar;
4232  }
4233  }
4234  }
4235  //**********************************************************************************************
4236 
4237  //**BLAS-based addition assignment to dense vectors (default)***********************************
4251  template< typename VT1 // Type of the left-hand side target vector
4252  , typename MT1 // Type of the left-hand side matrix operand
4253  , typename VT2 // Type of the right-hand side vector operand
4254  , typename ST2 > // Type of the scalar value
4255  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4256  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4257  {
4258  selectLargeAddAssignKernel( y, A, x, scalar );
4259  }
4260  //**********************************************************************************************
4261 
4262  //**BLAS-based addition assignment to dense vectors (single precision)**************************
4263 #if BLAZE_BLAS_MODE
4264 
4277  template< typename VT1 // Type of the left-hand side target vector
4278  , typename MT1 // Type of the left-hand side matrix operand
4279  , typename VT2 // Type of the right-hand side vector operand
4280  , typename ST2 > // Type of the scalar value
4281  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4282  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4283  {
4284  if( IsTriangular<MT1>::value ) {
4285  typename VT1::ResultType tmp( scalar * x );
4286  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4287  addAssign( y, tmp );
4288  }
4289  else {
4290  sgemv( y, A, x, scalar, 1.0F );
4291  }
4292  }
4293 #endif
4294  //**********************************************************************************************
4295 
4296  //**BLAS-based addition assignment to dense vectors (double precision)**************************
4297 #if BLAZE_BLAS_MODE
4298 
4311  template< typename VT1 // Type of the left-hand side target vector
4312  , typename MT1 // Type of the left-hand side matrix operand
4313  , typename VT2 // Type of the right-hand side vector operand
4314  , typename ST2 > // Type of the scalar value
4315  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4316  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4317  {
4318  if( IsTriangular<MT1>::value ) {
4319  typename VT1::ResultType tmp( scalar * x );
4320  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4321  addAssign( y, tmp );
4322  }
4323  else {
4324  dgemv( y, A, x, scalar, 1.0 );
4325  }
4326  }
4327 #endif
4328  //**********************************************************************************************
4329 
4330  //**BLAS-based addition assignment to dense vectors (single precision complex)******************
4331 #if BLAZE_BLAS_MODE
4332 
4345  template< typename VT1 // Type of the left-hand side target vector
4346  , typename MT1 // Type of the left-hand side matrix operand
4347  , typename VT2 // Type of the right-hand side vector operand
4348  , typename ST2 > // Type of the scalar value
4349  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4350  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4351  {
4352  if( IsTriangular<MT1>::value ) {
4353  typename VT1::ResultType tmp( scalar * x );
4354  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4355  addAssign( y, tmp );
4356  }
4357  else {
4358  cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4359  }
4360  }
4361 #endif
4362  //**********************************************************************************************
4363 
4364  //**BLAS-based addition assignment to dense vectors (double precision complex)******************
4365 #if BLAZE_BLAS_MODE
4366 
4379  template< typename VT1 // Type of the left-hand side target vector
4380  , typename MT1 // Type of the left-hand side matrix operand
4381  , typename VT2 // Type of the right-hand side vector operand
4382  , typename ST2 > // Type of the scalar value
4383  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4384  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4385  {
4386  if( IsTriangular<MT1>::value ) {
4387  typename VT1::ResultType tmp( scalar * x );
4388  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4389  addAssign( y, tmp );
4390  }
4391  else {
4392  zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4393  }
4394  }
4395 #endif
4396  //**********************************************************************************************
4397 
4398  //**Addition assignment to sparse vectors*******************************************************
4399  // No special implementation for the addition assignment to sparse vectors.
4400  //**********************************************************************************************
4401 
4402  //**Subtraction assignment to dense vectors*****************************************************
4414  template< typename VT1 > // Type of the target dense vector
4415  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4416  {
4418 
4419  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4420 
4421  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4422  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4423 
4424  if( left.rows() == 0UL || left.columns() == 0UL ) {
4425  return;
4426  }
4427 
4428  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4429  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
4430 
4431  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4432  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4433  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4434  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4435 
4436  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
4437  }
4438  //**********************************************************************************************
4439 
4440  //**Subtraction assignment to dense vectors (kernel selection)**********************************
4451  template< typename VT1 // Type of the left-hand side target vector
4452  , typename MT1 // Type of the left-hand side matrix operand
4453  , typename VT2 // Type of the right-hand side vector operand
4454  , typename ST2 > // Type of the scalar value
4455  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4456  {
4457  if( ( IsDiagonal<MT1>::value ) ||
4458  ( IsComputation<MT>::value && !evaluateMatrix ) ||
4459  ( A.rows() * A.columns() < DMATDVECMULT_THRESHOLD ) )
4460  selectSmallSubAssignKernel( y, A, x, scalar );
4461  else
4462  selectBlasSubAssignKernel( y, A, x, scalar );
4463  }
4464  //**********************************************************************************************
4465 
4466  //**Default subtraction assignment to dense vectors*********************************************
4480  template< typename VT1 // Type of the left-hand side target vector
4481  , typename MT1 // Type of the left-hand side matrix operand
4482  , typename VT2 // Type of the right-hand side vector operand
4483  , typename ST2 > // Type of the scalar value
4484  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4485  {
4486  y.subAssign( A * x * scalar );
4487  }
4488  //**********************************************************************************************
4489 
4490  //**Default subtraction assignment to dense vectors (small matrices)****************************
4504  template< typename VT1 // Type of the left-hand side target vector
4505  , typename MT1 // Type of the left-hand side matrix operand
4506  , typename VT2 // Type of the right-hand side vector operand
4507  , typename ST2 > // Type of the scalar value
4508  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4509  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4510  {
4511  selectDefaultSubAssignKernel( y, A, x, scalar );
4512  }
4513  //**********************************************************************************************
4514 
4515  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
4529  template< typename VT1 // Type of the left-hand side target vector
4530  , typename MT1 // Type of the left-hand side matrix operand
4531  , typename VT2 // Type of the right-hand side vector operand
4532  , typename ST2 > // Type of the scalar value
4533  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4534  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4535  {
4536  typedef IntrinsicTrait<ElementType> IT;
4537 
4538  const size_t M( A.rows() );
4539  const size_t N( A.columns() );
4540 
4541  size_t i( 0UL );
4542 
4543  for( ; (i+8UL) <= M; i+=8UL )
4544  {
4545  const size_t jbegin( ( IsUpper<MT1>::value )
4546  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4547  :( 0UL ) );
4548  const size_t jend( ( IsLower<MT1>::value )
4549  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
4550  :( N ) );
4551  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4552 
4553  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4554 
4555  for( size_t j=jbegin; j<jend; j+=IT::size ) {
4556  const IntrinsicType x1( x.load(j) );
4557  xmm1 = xmm1 + A.load(i ,j) * x1;
4558  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
4559  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
4560  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
4561  xmm5 = xmm5 + A.load(i+4UL,j) * x1;
4562  xmm6 = xmm6 + A.load(i+5UL,j) * x1;
4563  xmm7 = xmm7 + A.load(i+6UL,j) * x1;
4564  xmm8 = xmm8 + A.load(i+7UL,j) * x1;
4565  }
4566 
4567  y[i ] -= sum( xmm1 ) * scalar;
4568  y[i+1UL] -= sum( xmm2 ) * scalar;
4569  y[i+2UL] -= sum( xmm3 ) * scalar;
4570  y[i+3UL] -= sum( xmm4 ) * scalar;
4571  y[i+4UL] -= sum( xmm5 ) * scalar;
4572  y[i+5UL] -= sum( xmm6 ) * scalar;
4573  y[i+6UL] -= sum( xmm7 ) * scalar;
4574  y[i+7UL] -= sum( xmm8 ) * scalar;
4575  }
4576 
4577  for( ; (i+4UL) <= M; i+=4UL )
4578  {
4579  const size_t jbegin( ( IsUpper<MT1>::value )
4580  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4581  :( 0UL ) );
4582  const size_t jend( ( IsLower<MT1>::value )
4583  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
4584  :( N ) );
4585  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4586 
4587  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4588 
4589  for( size_t j=jbegin; j<jend; j+=IT::size ) {
4590  const IntrinsicType x1( x.load(j) );
4591  xmm1 = xmm1 + A.load(i ,j) * x1;
4592  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
4593  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
4594  xmm4 = xmm4 + A.load(i+3UL,j) * x1;
4595  }
4596 
4597  y[i ] -= sum( xmm1 ) * scalar;
4598  y[i+1UL] -= sum( xmm2 ) * scalar;
4599  y[i+2UL] -= sum( xmm3 ) * scalar;
4600  y[i+3UL] -= sum( xmm4 ) * scalar;
4601  }
4602 
4603  for( ; (i+3UL) <= M; i+=3UL )
4604  {
4605  const size_t jbegin( ( IsUpper<MT1>::value )
4606  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4607  :( 0UL ) );
4608  const size_t jend( ( IsLower<MT1>::value )
4609  ?( IsStrictlyLower<MT1>::value ? i+2UL : i+3UL )
4610  :( N ) );
4611  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4612 
4613  IntrinsicType xmm1, xmm2, xmm3;
4614 
4615  for( size_t j=jbegin; j<jend; j+=IT::size ) {
4616  const IntrinsicType x1( x.load(j) );
4617  xmm1 = xmm1 + A.load(i ,j) * x1;
4618  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
4619  xmm3 = xmm3 + A.load(i+2UL,j) * x1;
4620  }
4621 
4622  y[i ] -= sum( xmm1 ) * scalar;
4623  y[i+1UL] -= sum( xmm2 ) * scalar;
4624  y[i+2UL] -= sum( xmm3 ) * scalar;
4625  }
4626 
4627  for( ; (i+2UL) <= M; i+=2UL )
4628  {
4629  const size_t jbegin( ( IsUpper<MT1>::value )
4630  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4631  :( 0UL ) );
4632  const size_t jend( ( IsLower<MT1>::value )
4633  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
4634  :( N ) );
4635  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4636 
4637  IntrinsicType xmm1, xmm2;
4638 
4639  for( size_t j=jbegin; j<jend; j+=IT::size ) {
4640  const IntrinsicType x1( x.load(j) );
4641  xmm1 = xmm1 + A.load(i ,j) * x1;
4642  xmm2 = xmm2 + A.load(i+1UL,j) * x1;
4643  }
4644 
4645  y[i ] -= sum( xmm1 ) * scalar;
4646  y[i+1UL] -= sum( xmm2 ) * scalar;
4647  }
4648 
4649  if( i < M )
4650  {
4651  const size_t jbegin( ( IsUpper<MT1>::value )
4652  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4653  :( 0UL ) );
4654  const size_t jend( ( IsLower<MT1>::value )
4655  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
4656  :( N ) );
4657  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4658 
4659  IntrinsicType xmm1;
4660 
4661  for( size_t j=jbegin; j<jend; j+=IT::size ) {
4662  xmm1 = xmm1 + A.load(i,j) * x.load(j);
4663  }
4664 
4665  y[i] -= sum( xmm1 ) * scalar;
4666  }
4667  }
4668  //**********************************************************************************************
4669 
4670  //**Default subtraction assignment to dense vectors (large matrices)****************************
4684  template< typename VT1 // Type of the left-hand side target vector
4685  , typename MT1 // Type of the left-hand side matrix operand
4686  , typename VT2 // Type of the right-hand side vector operand
4687  , typename ST2 > // Type of the scalar value
4688  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4689  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4690  {
4691  selectDefaultSubAssignKernel( y, A, x, scalar );
4692  }
4693  //**********************************************************************************************
4694 
4695  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4709  template< typename VT1 // Type of the left-hand side target vector
4710  , typename MT1 // Type of the left-hand side matrix operand
4711  , typename VT2 // Type of the right-hand side vector operand
4712  , typename ST2 > // Type of the scalar value
4713  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4714  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4715  {
4716  typedef IntrinsicTrait<ElementType> IT;
4717 
4718  const size_t M( A.rows() );
4719  const size_t N( A.columns() );
4720 
4721  size_t i( 0UL );
4722 
4723  for( ; (i+8UL) <= M; i+=8UL )
4724  {
4725  const size_t jbegin( ( IsUpper<MT1>::value )
4726  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4727  :( 0UL ) );
4728  const size_t jend( ( IsLower<MT1>::value )
4729  ?( IsStrictlyLower<MT1>::value ? i+7UL : i+8UL )
4730  :( N ) );
4731  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4732 
4733  size_t j( jbegin );
4734 
4735  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4736  const size_t j1( j+IT::size );
4737  const size_t j2( j+IT::size*2UL );
4738  const size_t j3( j+IT::size*3UL );
4739  const IntrinsicType x1( x.load(j ) );
4740  const IntrinsicType x2( x.load(j1) );
4741  const IntrinsicType x3( x.load(j2) );
4742  const IntrinsicType x4( x.load(j3) );
4743  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 ) * scalar;
4744  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 ) * scalar;
4745  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 ) * scalar;
4746  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 ) * scalar;
4747  y[i+4UL] -= sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 + A.load(i+4UL,j2) * x3 + A.load(i+4UL,j3) * x4 ) * scalar;
4748  y[i+5UL] -= sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 + A.load(i+5UL,j2) * x3 + A.load(i+5UL,j3) * x4 ) * scalar;
4749  y[i+6UL] -= sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 + A.load(i+6UL,j2) * x3 + A.load(i+6UL,j3) * x4 ) * scalar;
4750  y[i+7UL] -= sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 + A.load(i+7UL,j2) * x3 + A.load(i+7UL,j3) * x4 ) * scalar;
4751  }
4752 
4753  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4754  const size_t j1( j+IT::size );
4755  const IntrinsicType x1( x.load(j ) );
4756  const IntrinsicType x2( x.load(j1) );
4757  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 ) * scalar;
4758  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 ) * scalar;
4759  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 ) * scalar;
4760  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 ) * scalar;
4761  y[i+4UL] -= sum( A.load(i+4UL,j) * x1 + A.load(i+4UL,j1) * x2 ) * scalar;
4762  y[i+5UL] -= sum( A.load(i+5UL,j) * x1 + A.load(i+5UL,j1) * x2 ) * scalar;
4763  y[i+6UL] -= sum( A.load(i+6UL,j) * x1 + A.load(i+6UL,j1) * x2 ) * scalar;
4764  y[i+7UL] -= sum( A.load(i+7UL,j) * x1 + A.load(i+7UL,j1) * x2 ) * scalar;
4765  }
4766 
4767  if( j < jend ) {
4768  const IntrinsicType x1( x.load(j) );
4769  y[i ] -= sum( A.load(i ,j) * x1 ) * scalar;
4770  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 ) * scalar;
4771  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 ) * scalar;
4772  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 ) * scalar;
4773  y[i+4UL] -= sum( A.load(i+4UL,j) * x1 ) * scalar;
4774  y[i+5UL] -= sum( A.load(i+5UL,j) * x1 ) * scalar;
4775  y[i+6UL] -= sum( A.load(i+6UL,j) * x1 ) * scalar;
4776  y[i+7UL] -= sum( A.load(i+7UL,j) * x1 ) * scalar;
4777  }
4778  }
4779 
4780  for( ; (i+4UL) <= M; i+=4UL )
4781  {
4782  const size_t jbegin( ( IsUpper<MT1>::value )
4783  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4784  :( 0UL ) );
4785  const size_t jend( ( IsLower<MT1>::value )
4786  ?( IsStrictlyLower<MT1>::value ? i+3UL : i+4UL )
4787  :( N ) );
4788  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4789 
4790  size_t j( jbegin );
4791 
4792  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4793  const size_t j1( j+IT::size );
4794  const size_t j2( j+IT::size*2UL );
4795  const size_t j3( j+IT::size*3UL );
4796  const IntrinsicType x1( x.load(j ) );
4797  const IntrinsicType x2( x.load(j1) );
4798  const IntrinsicType x3( x.load(j2) );
4799  const IntrinsicType x4( x.load(j3) );
4800  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 ) * scalar;
4801  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 ) * scalar;
4802  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 + A.load(i+2UL,j2) * x3 + A.load(i+2UL,j3) * x4 ) * scalar;
4803  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 + A.load(i+3UL,j2) * x3 + A.load(i+3UL,j3) * x4 ) * scalar;
4804  }
4805 
4806  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4807  const size_t j1( j+IT::size );
4808  const IntrinsicType x1( x.load(j ) );
4809  const IntrinsicType x2( x.load(j1) );
4810  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 ) * scalar;
4811  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 ) * scalar;
4812  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 + A.load(i+2UL,j1) * x2 ) * scalar;
4813  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 + A.load(i+3UL,j1) * x2 ) * scalar;
4814  }
4815 
4816  if( j < jend ) {
4817  const IntrinsicType x1( x.load(j) );
4818  y[i ] -= sum( A.load(i ,j) * x1 ) * scalar;
4819  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 ) * scalar;
4820  y[i+2UL] -= sum( A.load(i+2UL,j) * x1 ) * scalar;
4821  y[i+3UL] -= sum( A.load(i+3UL,j) * x1 ) * scalar;
4822  }
4823  }
4824 
4825  for( ; (i+2UL) <= M; i+=2UL )
4826  {
4827  const size_t jbegin( ( IsUpper<MT1>::value )
4828  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4829  :( 0UL ) );
4830  const size_t jend( ( IsLower<MT1>::value )
4831  ?( IsStrictlyLower<MT1>::value ? i+1UL : i+2UL )
4832  :( N ) );
4833  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4834 
4835  size_t j( jbegin );
4836 
4837  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4838  const size_t j1( j+IT::size );
4839  const size_t j2( j+IT::size*2UL );
4840  const size_t j3( j+IT::size*3UL );
4841  const IntrinsicType x1( x.load(j ) );
4842  const IntrinsicType x2( x.load(j1) );
4843  const IntrinsicType x3( x.load(j2) );
4844  const IntrinsicType x4( x.load(j3) );
4845  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 + A.load(i ,j2) * x3 + A.load(i ,j3) * x4 ) * scalar;
4846  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 + A.load(i+1UL,j2) * x3 + A.load(i+1UL,j3) * x4 ) * scalar;
4847  }
4848 
4849  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4850  const size_t j1( j+IT::size );
4851  const IntrinsicType x1( x.load(j ) );
4852  const IntrinsicType x2( x.load(j1) );
4853  y[i ] -= sum( A.load(i ,j) * x1 + A.load(i ,j1) * x2 ) * scalar;
4854  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 + A.load(i+1UL,j1) * x2 ) * scalar;
4855  }
4856 
4857  if( j < jend ) {
4858  const IntrinsicType x1( x.load(j) );
4859  y[i ] -= sum( A.load(i ,j) * x1 ) * scalar;
4860  y[i+1UL] -= sum( A.load(i+1UL,j) * x1 ) * scalar;
4861  }
4862  }
4863 
4864  if( i < M )
4865  {
4866  const size_t jbegin( ( IsUpper<MT1>::value )
4867  ?( ( IsStrictlyUpper<MT1>::value ? i+1UL : i ) & size_t(-IT::size) )
4868  :( 0UL ) );
4869  const size_t jend( ( IsLower<MT1>::value )
4870  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
4871  :( N ) );
4872  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4873 
4874  size_t j( jbegin );
4875 
4876  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL ) {
4877  const size_t j1( j+IT::size );
4878  const size_t j2( j+IT::size*2UL );
4879  const size_t j3( j+IT::size*3UL );
4880  const IntrinsicType x1( x.load(j ) );
4881  const IntrinsicType x2( x.load(j1) );
4882  const IntrinsicType x3( x.load(j2) );
4883  const IntrinsicType x4( x.load(j3) );
4884  y[i] -= sum( A.load(i,j) * x1 + A.load(i,j1) * x2 + A.load(i,j2) * x3 + A.load(i,j3) * x4 ) * scalar;
4885  }
4886 
4887  for( ; (j+IT::size) < jend; j+=IT::size*2UL ) {
4888  const size_t j1( j+IT::size );
4889  const IntrinsicType x1( x.load(j ) );
4890  const IntrinsicType x2( x.load(j1) );
4891  y[i] -= sum( A.load(i,j) * x1 + A.load(i,j1) * x2 ) * scalar;
4892  }
4893 
4894  if( j < jend ) {
4895  const IntrinsicType x1( x.load(j) );
4896  y[i] -= sum( A.load(i,j) * x1 ) * scalar;
4897  }
4898  }
4899  }
4900  //**********************************************************************************************
4901 
4902  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4916  template< typename VT1 // Type of the left-hand side target vector
4917  , typename MT1 // Type of the left-hand side matrix operand
4918  , typename VT2 // Type of the right-hand side vector operand
4919  , typename ST2 > // Type of the scalar value
4920  static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4921  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4922  {
4923  selectLargeSubAssignKernel( y, A, x, scalar );
4924  }
4925  //**********************************************************************************************
4926 
4927  //**BLAS-based subtraction assignment to dense vectors (single precision)***********************
4928 #if BLAZE_BLAS_MODE
4929 
4942  template< typename VT1 // Type of the left-hand side target vector
4943  , typename MT1 // Type of the left-hand side matrix operand
4944  , typename VT2 // Type of the right-hand side vector operand
4945  , typename ST2 > // Type of the scalar value
4946  static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4947  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4948  {
4949  if( IsTriangular<MT1>::value ) {
4950  typename VT1::ResultType tmp( scalar * x );
4951  strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4952  subAssign( y, tmp );
4953  }
4954  else {
4955  sgemv( y, A, x, -scalar, 1.0F );
4956  }
4957  }
4958 #endif
4959  //**********************************************************************************************
4960 
4961  //**BLAS-based subtraction assignment to dense vectors (double precision)***********************
4962 #if BLAZE_BLAS_MODE
4963 
4976  template< typename VT1 // Type of the left-hand side target vector
4977  , typename MT1 // Type of the left-hand side matrix operand
4978  , typename VT2 // Type of the right-hand side vector operand
4979  , typename ST2 > // Type of the scalar value
4980  static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4981  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4982  {
4983  if( IsTriangular<MT1>::value ) {
4984  typename VT1::ResultType tmp( scalar * x );
4985  dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4986  subAssign( y, tmp );
4987  }
4988  else {
4989  dgemv( y, A, x, -scalar, 1.0 );
4990  }
4991  }
4992 #endif
4993  //**********************************************************************************************
4994 
4995  //**BLAS-based subtraction assignment to dense vectors (single precision complex)***************
4996 #if BLAZE_BLAS_MODE
4997 
5010  template< typename VT1 // Type of the left-hand side target vector
5011  , typename MT1 // Type of the left-hand side matrix operand
5012  , typename VT2 // Type of the right-hand side vector operand
5013  , typename ST2 > // Type of the scalar value
5014  static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
5015  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
5016  {
5017  if( IsTriangular<MT1>::value ) {
5018  typename VT1::ResultType tmp( scalar * x );
5019  ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
5020  subAssign( y, tmp );
5021  }
5022  else {
5023  cgemv( y, A, x, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
5024  }
5025  }
5026 #endif
5027  //**********************************************************************************************
5028 
5029  //**BLAS-based subtraction assignment to dense vectors (double precision complex)***************
5030 #if BLAZE_BLAS_MODE
5031 
5044  template< typename VT1 // Type of the left-hand side target vector
5045  , typename MT1 // Type of the left-hand side matrix operand
5046  , typename VT2 // Type of the right-hand side vector operand
5047  , typename ST2 > // Type of the scalar value
5048  static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
5049  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
5050  {
5051  if( IsTriangular<MT1>::value ) {
5052  typename VT1::ResultType tmp( scalar * x );
5053  ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
5054  subAssign( y, tmp );
5055  }
5056  else {
5057  zgemv( y, A, x, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
5058  }
5059  }
5060 #endif
5061  //**********************************************************************************************
5062 
5063  //**Subtraction assignment to sparse vectors****************************************************
5064  // No special implementation for the subtraction assignment to sparse vectors.
5065  //**********************************************************************************************
5066 
5067  //**Multiplication assignment to dense vectors**************************************************
5079  template< typename VT1 > // Type of the target dense vector
5080  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
5081  {
5083 
5087 
5088  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5089 
5090  const ResultType tmp( serial( rhs ) );
5091  multAssign( ~lhs, tmp );
5092  }
5093  //**********************************************************************************************
5094 
5095  //**Multiplication assignment to sparse vectors*************************************************
5096  // No special implementation for the multiplication assignment to sparse vectors.
5097  //**********************************************************************************************
5098 
5099  //**SMP assignment to dense vectors*************************************************************
5113  template< typename VT1 > // Type of the target dense vector
5114  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5115  smpAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
5116  {
5118 
5119  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5120 
5121  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
5122  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
5123 
5124  if( left.rows() == 0UL ) {
5125  return;
5126  }
5127  else if( left.columns() == 0UL ) {
5128  reset( ~lhs );
5129  return;
5130  }
5131 
5132  LT A( left ); // Evaluation of the left-hand side dense matrix operand
5133  RT x( right ); // Evaluation of the right-hand side dense vector operand
5134 
5135  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5136  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
5137  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
5138  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
5139 
5140  smpAssign( ~lhs, A * x * rhs.scalar_ );
5141  }
5142  //**********************************************************************************************
5143 
5144  //**SMP assignment to sparse vectors************************************************************
5158  template< typename VT1 > // Type of the target sparse vector
5159  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5160  smpAssign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
5161  {
5163 
5167 
5168  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5169 
5170  const ResultType tmp( rhs );
5171  smpAssign( ~lhs, tmp );
5172  }
5173  //**********************************************************************************************
5174 
5175  //**SMP addition assignment to dense vectors****************************************************
5189  template< typename VT1 > // Type of the target dense vector
5190  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5191  smpAddAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
5192  {
5194 
5195  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5196 
5197  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
5198  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
5199 
5200  if( left.rows() == 0UL || left.columns() == 0UL ) {
5201  return;
5202  }
5203 
5204  LT A( left ); // Evaluation of the left-hand side dense matrix operand
5205  RT x( right ); // Evaluation of the right-hand side dense vector operand
5206 
5207  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5208  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
5209  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
5210  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
5211 
5212  smpAddAssign( ~lhs, A * x * rhs.scalar_ );
5213  }
5214  //**********************************************************************************************
5215 
5216  //**SMP addition assignment to sparse vectors***************************************************
5217  // No special implementation for the SMP addition assignment to sparse vectors.
5218  //**********************************************************************************************
5219 
5220  //**SMP subtraction assignment to dense vectors*************************************************
5234  template< typename VT1 > // Type of the target dense vector
5235  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5236  smpSubAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
5237  {
5239 
5240  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5241 
5242  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
5243  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
5244 
5245  if( left.rows() == 0UL || left.columns() == 0UL ) {
5246  return;
5247  }
5248 
5249  LT A( left ); // Evaluation of the left-hand side dense matrix operand
5250  RT x( right ); // Evaluation of the right-hand side dense vector operand
5251 
5252  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5253  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
5254  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
5255  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
5256 
5257  smpSubAssign( ~lhs, A * x * rhs.scalar_ );
5258  }
5259  //**********************************************************************************************
5260 
5261  //**SMP subtraction assignment to sparse vectors************************************************
5262  // No special implementation for the SMP subtraction assignment to sparse vectors.
5263  //**********************************************************************************************
5264 
5265  //**SMP multiplication assignment to dense vectors**********************************************
5279  template< typename VT1 > // Type of the target dense vector
5280  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5281  smpMultAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
5282  {
5284 
5288 
5289  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
5290 
5291  const ResultType tmp( rhs );
5292  smpMultAssign( ~lhs, tmp );
5293  }
5294  //**********************************************************************************************
5295 
5296  //**SMP multiplication assignment to sparse vectors*********************************************
5297  // No special implementation for the SMP multiplication assignment to sparse vectors.
5298  //**********************************************************************************************
5299 
5300  //**Compile time checks*************************************************************************
5308  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
5309  //**********************************************************************************************
5310 };
5312 //*************************************************************************************************
5313 
5314 
5315 
5316 
5317 //=================================================================================================
5318 //
5319 // GLOBAL BINARY ARITHMETIC OPERATORS
5320 //
5321 //=================================================================================================
5322 
5323 //*************************************************************************************************
5353 template< typename T1 // Type of the left-hand side dense matrix
5354  , typename T2 > // Type of the right-hand side dense vector
5355 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
5357 {
5359 
5360  if( (~mat).columns() != (~vec).size() )
5361  throw std::invalid_argument( "Matrix and vector sizes do not match" );
5362 
5363  return DMatDVecMultExpr<T1,T2>( ~mat, ~vec );
5364 }
5365 //*************************************************************************************************
5366 
5367 
5368 
5369 
5370 //=================================================================================================
5371 //
5372 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
5373 //
5374 //=================================================================================================
5375 
5376 //*************************************************************************************************
5389 template< typename T1 // Type of the left-hand side dense matrix
5390  , bool SO // Storage order of the left-hand side dense matrix
5391  , typename T2 > // Type of the right-hand side dense vector
5392 inline const typename EnableIf< IsMatMatMultExpr<T1>, typename MultExprTrait<T1,T2>::Type >::Type
5394 {
5396 
5398 
5399  return (~mat).leftOperand() * ( (~mat).rightOperand() * vec );
5400 }
5401 //*************************************************************************************************
5402 
5403 
5404 
5405 
5406 //=================================================================================================
5407 //
5408 // SIZE SPECIALIZATIONS
5409 //
5410 //=================================================================================================
5411 
5412 //*************************************************************************************************
5414 template< typename MT, typename VT >
5415 struct Size< DMatDVecMultExpr<MT,VT> >
5416  : public Rows<MT>
5417 {};
5419 //*************************************************************************************************
5420 
5421 
5422 
5423 
5424 //=================================================================================================
5425 //
5426 // EXPRESSION TRAIT SPECIALIZATIONS
5427 //
5428 //=================================================================================================
5429 
5430 //*************************************************************************************************
5432 template< typename MT, typename VT, bool AF >
5433 struct SubvectorExprTrait< DMatDVecMultExpr<MT,VT>, AF >
5434 {
5435  public:
5436  //**********************************************************************************************
5437  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type
5438  , typename SubvectorExprTrait<const VT,AF>::Type >::Type Type;
5439  //**********************************************************************************************
5440 };
5442 //*************************************************************************************************
5443 
5444 } // namespace blaze
5445 
5446 #endif
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:284
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:386
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Header file for basic type definitions.
DMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the DMatDVecMultExpr class.
Definition: DMatDVecMultExpr.h:312
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
Expression object for dense matrix-dense vector multiplications.The DMatDVecMultExpr class represents...
Definition: DMatDVecMultExpr.h:113
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:119
Header file for the IsSame and IsStrictlySame type traits.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDVecMultExpr.h:420
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDVecMultExpr.h:273
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
const size_t SMP_DMATDVECMULT_THRESHOLD
SMP row-major dense matrix/dense vector multiplication threshold.This threshold specifies when a row-...
Definition: Thresholds.h:322
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
DMatDVecMultExpr< MT, VT > This
Type of this DMatDVecMultExpr instance.
Definition: DMatDVecMultExpr.h:272
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
size_t size() const
Returns the current size/dimension of the vector.
Definition: DMatDVecMultExpr.h:366
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:124
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:121
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:263
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:290
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
Header file for the IsMatMatMultExpr type trait class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:123
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:376
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDVecMultExpr.h:430
Constraint on the data type.
Base class for all matrix/vector multiplication expression templates.The MatVecMultExpr class serves ...
Definition: MatVecMultExpr.h:66
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:287
Constraints on the storage order of matrix types.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDVecMultExpr.h:275
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDVecMultExpr.h:440
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
Header file for BLAS level 2 functions.
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:150
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: DMatDVecMultExpr.h:441
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDVecMultExpr.h:398
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:120
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDVecMultExpr.h:278
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:281
Constraint on the data type.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDVecMultExpr.h:410
Header file for the HasMutableDataAccess type trait.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDVecMultExpr.h:274
Header file for all intrinsic functionality.
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDVecMultExpr.h:276
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDVecMultExpr.h:277
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: DMatDVecMultExpr.h:326
Header file for the IsUpper type trait.
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
const size_t DMATDVECMULT_THRESHOLD
Row-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:57
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:122
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849